be2net: Support UE recovery in BEx/Skyhawk adapters
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_mac_addr_set(struct net_device *netdev, void *p)
273 {
274         struct be_adapter *adapter = netdev_priv(netdev);
275         struct device *dev = &adapter->pdev->dev;
276         struct sockaddr *addr = p;
277         int status;
278         u8 mac[ETH_ALEN];
279         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
280
281         if (!is_valid_ether_addr(addr->sa_data))
282                 return -EADDRNOTAVAIL;
283
284         /* Proceed further only if, User provided MAC is different
285          * from active MAC
286          */
287         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
288                 return 0;
289
290         /* if device is not running, copy MAC to netdev->dev_addr */
291         if (!netif_running(netdev))
292                 goto done;
293
294         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
295          * privilege or if PF did not provision the new MAC address.
296          * On BE3, this cmd will always fail if the VF doesn't have the
297          * FILTMGMT privilege. This failure is OK, only if the PF programmed
298          * the MAC for the VF.
299          */
300         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
301                                  adapter->if_handle, &adapter->pmac_id[0], 0);
302         if (!status) {
303                 curr_pmac_id = adapter->pmac_id[0];
304
305                 /* Delete the old programmed MAC. This call may fail if the
306                  * old MAC was already deleted by the PF driver.
307                  */
308                 if (adapter->pmac_id[0] != old_pmac_id)
309                         be_cmd_pmac_del(adapter, adapter->if_handle,
310                                         old_pmac_id, 0);
311         }
312
313         /* Decide if the new MAC is successfully activated only after
314          * querying the FW
315          */
316         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
317                                        adapter->if_handle, true, 0);
318         if (status)
319                 goto err;
320
321         /* The MAC change did not happen, either due to lack of privilege
322          * or PF didn't pre-provision.
323          */
324         if (!ether_addr_equal(addr->sa_data, mac)) {
325                 status = -EPERM;
326                 goto err;
327         }
328 done:
329         ether_addr_copy(netdev->dev_addr, addr->sa_data);
330         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
331         return 0;
332 err:
333         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
334         return status;
335 }
336
337 /* BE2 supports only v0 cmd */
338 static void *hw_stats_from_cmd(struct be_adapter *adapter)
339 {
340         if (BE2_chip(adapter)) {
341                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
342
343                 return &cmd->hw_stats;
344         } else if (BE3_chip(adapter)) {
345                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
346
347                 return &cmd->hw_stats;
348         } else {
349                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
350
351                 return &cmd->hw_stats;
352         }
353 }
354
355 /* BE2 supports only v0 cmd */
356 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
357 {
358         if (BE2_chip(adapter)) {
359                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
360
361                 return &hw_stats->erx;
362         } else if (BE3_chip(adapter)) {
363                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
364
365                 return &hw_stats->erx;
366         } else {
367                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
368
369                 return &hw_stats->erx;
370         }
371 }
372
373 static void populate_be_v0_stats(struct be_adapter *adapter)
374 {
375         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
376         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
377         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
378         struct be_port_rxf_stats_v0 *port_stats =
379                                         &rxf_stats->port[adapter->port_num];
380         struct be_drv_stats *drvs = &adapter->drv_stats;
381
382         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
383         drvs->rx_pause_frames = port_stats->rx_pause_frames;
384         drvs->rx_crc_errors = port_stats->rx_crc_errors;
385         drvs->rx_control_frames = port_stats->rx_control_frames;
386         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
387         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
388         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
389         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
390         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
391         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
392         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
393         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
394         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
395         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
396         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
397         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
398         drvs->rx_dropped_header_too_small =
399                 port_stats->rx_dropped_header_too_small;
400         drvs->rx_address_filtered =
401                                         port_stats->rx_address_filtered +
402                                         port_stats->rx_vlan_filtered;
403         drvs->rx_alignment_symbol_errors =
404                 port_stats->rx_alignment_symbol_errors;
405
406         drvs->tx_pauseframes = port_stats->tx_pauseframes;
407         drvs->tx_controlframes = port_stats->tx_controlframes;
408
409         if (adapter->port_num)
410                 drvs->jabber_events = rxf_stats->port1_jabber_events;
411         else
412                 drvs->jabber_events = rxf_stats->port0_jabber_events;
413         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
414         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
415         drvs->forwarded_packets = rxf_stats->forwarded_packets;
416         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
417         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
418         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
419         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
420 }
421
422 static void populate_be_v1_stats(struct be_adapter *adapter)
423 {
424         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
425         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
426         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
427         struct be_port_rxf_stats_v1 *port_stats =
428                                         &rxf_stats->port[adapter->port_num];
429         struct be_drv_stats *drvs = &adapter->drv_stats;
430
431         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
432         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
433         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
434         drvs->rx_pause_frames = port_stats->rx_pause_frames;
435         drvs->rx_crc_errors = port_stats->rx_crc_errors;
436         drvs->rx_control_frames = port_stats->rx_control_frames;
437         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
438         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
439         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
440         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
441         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
442         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
443         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
444         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
445         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
446         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
447         drvs->rx_dropped_header_too_small =
448                 port_stats->rx_dropped_header_too_small;
449         drvs->rx_input_fifo_overflow_drop =
450                 port_stats->rx_input_fifo_overflow_drop;
451         drvs->rx_address_filtered = port_stats->rx_address_filtered;
452         drvs->rx_alignment_symbol_errors =
453                 port_stats->rx_alignment_symbol_errors;
454         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
455         drvs->tx_pauseframes = port_stats->tx_pauseframes;
456         drvs->tx_controlframes = port_stats->tx_controlframes;
457         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
458         drvs->jabber_events = port_stats->jabber_events;
459         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
460         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
461         drvs->forwarded_packets = rxf_stats->forwarded_packets;
462         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
463         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
464         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
465         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
466 }
467
468 static void populate_be_v2_stats(struct be_adapter *adapter)
469 {
470         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
471         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
472         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
473         struct be_port_rxf_stats_v2 *port_stats =
474                                         &rxf_stats->port[adapter->port_num];
475         struct be_drv_stats *drvs = &adapter->drv_stats;
476
477         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
478         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
479         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
480         drvs->rx_pause_frames = port_stats->rx_pause_frames;
481         drvs->rx_crc_errors = port_stats->rx_crc_errors;
482         drvs->rx_control_frames = port_stats->rx_control_frames;
483         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
484         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
485         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
486         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
487         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
488         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
489         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
490         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
491         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
492         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
493         drvs->rx_dropped_header_too_small =
494                 port_stats->rx_dropped_header_too_small;
495         drvs->rx_input_fifo_overflow_drop =
496                 port_stats->rx_input_fifo_overflow_drop;
497         drvs->rx_address_filtered = port_stats->rx_address_filtered;
498         drvs->rx_alignment_symbol_errors =
499                 port_stats->rx_alignment_symbol_errors;
500         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
501         drvs->tx_pauseframes = port_stats->tx_pauseframes;
502         drvs->tx_controlframes = port_stats->tx_controlframes;
503         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
504         drvs->jabber_events = port_stats->jabber_events;
505         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
506         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
507         drvs->forwarded_packets = rxf_stats->forwarded_packets;
508         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
509         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
510         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
511         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
512         if (be_roce_supported(adapter)) {
513                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
514                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
515                 drvs->rx_roce_frames = port_stats->roce_frames_received;
516                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
517                 drvs->roce_drops_payload_len =
518                         port_stats->roce_drops_payload_len;
519         }
520 }
521
522 static void populate_lancer_stats(struct be_adapter *adapter)
523 {
524         struct be_drv_stats *drvs = &adapter->drv_stats;
525         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
526
527         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
528         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
529         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
530         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
531         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
532         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
533         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
534         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
535         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
536         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
537         drvs->rx_dropped_tcp_length =
538                                 pport_stats->rx_dropped_invalid_tcp_length;
539         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
540         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
541         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
542         drvs->rx_dropped_header_too_small =
543                                 pport_stats->rx_dropped_header_too_small;
544         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
545         drvs->rx_address_filtered =
546                                         pport_stats->rx_address_filtered +
547                                         pport_stats->rx_vlan_filtered;
548         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
549         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
550         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
551         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
552         drvs->jabber_events = pport_stats->rx_jabbers;
553         drvs->forwarded_packets = pport_stats->num_forwards_lo;
554         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
555         drvs->rx_drops_too_many_frags =
556                                 pport_stats->rx_drops_too_many_frags_lo;
557 }
558
559 static void accumulate_16bit_val(u32 *acc, u16 val)
560 {
561 #define lo(x)                   (x & 0xFFFF)
562 #define hi(x)                   (x & 0xFFFF0000)
563         bool wrapped = val < lo(*acc);
564         u32 newacc = hi(*acc) + val;
565
566         if (wrapped)
567                 newacc += 65536;
568         ACCESS_ONCE(*acc) = newacc;
569 }
570
571 static void populate_erx_stats(struct be_adapter *adapter,
572                                struct be_rx_obj *rxo, u32 erx_stat)
573 {
574         if (!BEx_chip(adapter))
575                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
576         else
577                 /* below erx HW counter can actually wrap around after
578                  * 65535. Driver accumulates a 32-bit value
579                  */
580                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
581                                      (u16)erx_stat);
582 }
583
584 void be_parse_stats(struct be_adapter *adapter)
585 {
586         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
587         struct be_rx_obj *rxo;
588         int i;
589         u32 erx_stat;
590
591         if (lancer_chip(adapter)) {
592                 populate_lancer_stats(adapter);
593         } else {
594                 if (BE2_chip(adapter))
595                         populate_be_v0_stats(adapter);
596                 else if (BE3_chip(adapter))
597                         /* for BE3 */
598                         populate_be_v1_stats(adapter);
599                 else
600                         populate_be_v2_stats(adapter);
601
602                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
603                 for_all_rx_queues(adapter, rxo, i) {
604                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
605                         populate_erx_stats(adapter, rxo, erx_stat);
606                 }
607         }
608 }
609
610 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
611                                                 struct rtnl_link_stats64 *stats)
612 {
613         struct be_adapter *adapter = netdev_priv(netdev);
614         struct be_drv_stats *drvs = &adapter->drv_stats;
615         struct be_rx_obj *rxo;
616         struct be_tx_obj *txo;
617         u64 pkts, bytes;
618         unsigned int start;
619         int i;
620
621         for_all_rx_queues(adapter, rxo, i) {
622                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
623
624                 do {
625                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
626                         pkts = rx_stats(rxo)->rx_pkts;
627                         bytes = rx_stats(rxo)->rx_bytes;
628                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
629                 stats->rx_packets += pkts;
630                 stats->rx_bytes += bytes;
631                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
632                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
633                                         rx_stats(rxo)->rx_drops_no_frags;
634         }
635
636         for_all_tx_queues(adapter, txo, i) {
637                 const struct be_tx_stats *tx_stats = tx_stats(txo);
638
639                 do {
640                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
641                         pkts = tx_stats(txo)->tx_pkts;
642                         bytes = tx_stats(txo)->tx_bytes;
643                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
644                 stats->tx_packets += pkts;
645                 stats->tx_bytes += bytes;
646         }
647
648         /* bad pkts received */
649         stats->rx_errors = drvs->rx_crc_errors +
650                 drvs->rx_alignment_symbol_errors +
651                 drvs->rx_in_range_errors +
652                 drvs->rx_out_range_errors +
653                 drvs->rx_frame_too_long +
654                 drvs->rx_dropped_too_small +
655                 drvs->rx_dropped_too_short +
656                 drvs->rx_dropped_header_too_small +
657                 drvs->rx_dropped_tcp_length +
658                 drvs->rx_dropped_runt;
659
660         /* detailed rx errors */
661         stats->rx_length_errors = drvs->rx_in_range_errors +
662                 drvs->rx_out_range_errors +
663                 drvs->rx_frame_too_long;
664
665         stats->rx_crc_errors = drvs->rx_crc_errors;
666
667         /* frame alignment errors */
668         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
669
670         /* receiver fifo overrun */
671         /* drops_no_pbuf is no per i/f, it's per BE card */
672         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
673                                 drvs->rx_input_fifo_overflow_drop +
674                                 drvs->rx_drops_no_pbuf;
675         return stats;
676 }
677
678 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
679 {
680         struct net_device *netdev = adapter->netdev;
681
682         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
683                 netif_carrier_off(netdev);
684                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
685         }
686
687         if (link_status)
688                 netif_carrier_on(netdev);
689         else
690                 netif_carrier_off(netdev);
691
692         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
693 }
694
695 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
696 {
697         struct be_tx_stats *stats = tx_stats(txo);
698         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
699
700         u64_stats_update_begin(&stats->sync);
701         stats->tx_reqs++;
702         stats->tx_bytes += skb->len;
703         stats->tx_pkts += tx_pkts;
704         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
705                 stats->tx_vxlan_offload_pkts += tx_pkts;
706         u64_stats_update_end(&stats->sync);
707 }
708
709 /* Returns number of WRBs needed for the skb */
710 static u32 skb_wrb_cnt(struct sk_buff *skb)
711 {
712         /* +1 for the header wrb */
713         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
714 }
715
716 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
717 {
718         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
719         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
720         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
721         wrb->rsvd0 = 0;
722 }
723
724 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
725  * to avoid the swap and shift/mask operations in wrb_fill().
726  */
727 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
728 {
729         wrb->frag_pa_hi = 0;
730         wrb->frag_pa_lo = 0;
731         wrb->frag_len = 0;
732         wrb->rsvd0 = 0;
733 }
734
735 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
736                                      struct sk_buff *skb)
737 {
738         u8 vlan_prio;
739         u16 vlan_tag;
740
741         vlan_tag = skb_vlan_tag_get(skb);
742         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
743         /* If vlan priority provided by OS is NOT in available bmap */
744         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
745                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
746                                 adapter->recommended_prio_bits;
747
748         return vlan_tag;
749 }
750
751 /* Used only for IP tunnel packets */
752 static u16 skb_inner_ip_proto(struct sk_buff *skb)
753 {
754         return (inner_ip_hdr(skb)->version == 4) ?
755                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
756 }
757
758 static u16 skb_ip_proto(struct sk_buff *skb)
759 {
760         return (ip_hdr(skb)->version == 4) ?
761                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
762 }
763
764 static inline bool be_is_txq_full(struct be_tx_obj *txo)
765 {
766         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
767 }
768
769 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
770 {
771         return atomic_read(&txo->q.used) < txo->q.len / 2;
772 }
773
774 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
775 {
776         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
777 }
778
779 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
780                                        struct sk_buff *skb,
781                                        struct be_wrb_params *wrb_params)
782 {
783         u16 proto;
784
785         if (skb_is_gso(skb)) {
786                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
787                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
788                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
789                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
790         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
791                 if (skb->encapsulation) {
792                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
793                         proto = skb_inner_ip_proto(skb);
794                 } else {
795                         proto = skb_ip_proto(skb);
796                 }
797                 if (proto == IPPROTO_TCP)
798                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
799                 else if (proto == IPPROTO_UDP)
800                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
801         }
802
803         if (skb_vlan_tag_present(skb)) {
804                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
805                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
806         }
807
808         BE_WRB_F_SET(wrb_params->features, CRC, 1);
809 }
810
811 static void wrb_fill_hdr(struct be_adapter *adapter,
812                          struct be_eth_hdr_wrb *hdr,
813                          struct be_wrb_params *wrb_params,
814                          struct sk_buff *skb)
815 {
816         memset(hdr, 0, sizeof(*hdr));
817
818         SET_TX_WRB_HDR_BITS(crc, hdr,
819                             BE_WRB_F_GET(wrb_params->features, CRC));
820         SET_TX_WRB_HDR_BITS(ipcs, hdr,
821                             BE_WRB_F_GET(wrb_params->features, IPCS));
822         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
823                             BE_WRB_F_GET(wrb_params->features, TCPCS));
824         SET_TX_WRB_HDR_BITS(udpcs, hdr,
825                             BE_WRB_F_GET(wrb_params->features, UDPCS));
826
827         SET_TX_WRB_HDR_BITS(lso, hdr,
828                             BE_WRB_F_GET(wrb_params->features, LSO));
829         SET_TX_WRB_HDR_BITS(lso6, hdr,
830                             BE_WRB_F_GET(wrb_params->features, LSO6));
831         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
832
833         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
834          * hack is not needed, the evt bit is set while ringing DB.
835          */
836         SET_TX_WRB_HDR_BITS(event, hdr,
837                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
838         SET_TX_WRB_HDR_BITS(vlan, hdr,
839                             BE_WRB_F_GET(wrb_params->features, VLAN));
840         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
841
842         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
843         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
844         SET_TX_WRB_HDR_BITS(mgmt, hdr,
845                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
846 }
847
848 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
849                           bool unmap_single)
850 {
851         dma_addr_t dma;
852         u32 frag_len = le32_to_cpu(wrb->frag_len);
853
854
855         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
856                 (u64)le32_to_cpu(wrb->frag_pa_lo);
857         if (frag_len) {
858                 if (unmap_single)
859                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
860                 else
861                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
862         }
863 }
864
865 /* Grab a WRB header for xmit */
866 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
867 {
868         u32 head = txo->q.head;
869
870         queue_head_inc(&txo->q);
871         return head;
872 }
873
874 /* Set up the WRB header for xmit */
875 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
876                                 struct be_tx_obj *txo,
877                                 struct be_wrb_params *wrb_params,
878                                 struct sk_buff *skb, u16 head)
879 {
880         u32 num_frags = skb_wrb_cnt(skb);
881         struct be_queue_info *txq = &txo->q;
882         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
883
884         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
885         be_dws_cpu_to_le(hdr, sizeof(*hdr));
886
887         BUG_ON(txo->sent_skb_list[head]);
888         txo->sent_skb_list[head] = skb;
889         txo->last_req_hdr = head;
890         atomic_add(num_frags, &txq->used);
891         txo->last_req_wrb_cnt = num_frags;
892         txo->pend_wrb_cnt += num_frags;
893 }
894
895 /* Setup a WRB fragment (buffer descriptor) for xmit */
896 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
897                                  int len)
898 {
899         struct be_eth_wrb *wrb;
900         struct be_queue_info *txq = &txo->q;
901
902         wrb = queue_head_node(txq);
903         wrb_fill(wrb, busaddr, len);
904         queue_head_inc(txq);
905 }
906
907 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
908  * was invoked. The producer index is restored to the previous packet and the
909  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
910  */
911 static void be_xmit_restore(struct be_adapter *adapter,
912                             struct be_tx_obj *txo, u32 head, bool map_single,
913                             u32 copied)
914 {
915         struct device *dev;
916         struct be_eth_wrb *wrb;
917         struct be_queue_info *txq = &txo->q;
918
919         dev = &adapter->pdev->dev;
920         txq->head = head;
921
922         /* skip the first wrb (hdr); it's not mapped */
923         queue_head_inc(txq);
924         while (copied) {
925                 wrb = queue_head_node(txq);
926                 unmap_tx_frag(dev, wrb, map_single);
927                 map_single = false;
928                 copied -= le32_to_cpu(wrb->frag_len);
929                 queue_head_inc(txq);
930         }
931
932         txq->head = head;
933 }
934
935 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
936  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
937  * of WRBs used up by the packet.
938  */
939 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
940                            struct sk_buff *skb,
941                            struct be_wrb_params *wrb_params)
942 {
943         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
944         struct device *dev = &adapter->pdev->dev;
945         struct be_queue_info *txq = &txo->q;
946         bool map_single = false;
947         u32 head = txq->head;
948         dma_addr_t busaddr;
949         int len;
950
951         head = be_tx_get_wrb_hdr(txo);
952
953         if (skb->len > skb->data_len) {
954                 len = skb_headlen(skb);
955
956                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
957                 if (dma_mapping_error(dev, busaddr))
958                         goto dma_err;
959                 map_single = true;
960                 be_tx_setup_wrb_frag(txo, busaddr, len);
961                 copied += len;
962         }
963
964         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
965                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
966                 len = skb_frag_size(frag);
967
968                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
969                 if (dma_mapping_error(dev, busaddr))
970                         goto dma_err;
971                 be_tx_setup_wrb_frag(txo, busaddr, len);
972                 copied += len;
973         }
974
975         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
976
977         be_tx_stats_update(txo, skb);
978         return wrb_cnt;
979
980 dma_err:
981         adapter->drv_stats.dma_map_errors++;
982         be_xmit_restore(adapter, txo, head, map_single, copied);
983         return 0;
984 }
985
986 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
987 {
988         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
989 }
990
991 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
992                                              struct sk_buff *skb,
993                                              struct be_wrb_params
994                                              *wrb_params)
995 {
996         u16 vlan_tag = 0;
997
998         skb = skb_share_check(skb, GFP_ATOMIC);
999         if (unlikely(!skb))
1000                 return skb;
1001
1002         if (skb_vlan_tag_present(skb))
1003                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1004
1005         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1006                 if (!vlan_tag)
1007                         vlan_tag = adapter->pvid;
1008                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1009                  * skip VLAN insertion
1010                  */
1011                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1012         }
1013
1014         if (vlan_tag) {
1015                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1016                                                 vlan_tag);
1017                 if (unlikely(!skb))
1018                         return skb;
1019                 skb->vlan_tci = 0;
1020         }
1021
1022         /* Insert the outer VLAN, if any */
1023         if (adapter->qnq_vid) {
1024                 vlan_tag = adapter->qnq_vid;
1025                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1026                                                 vlan_tag);
1027                 if (unlikely(!skb))
1028                         return skb;
1029                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1030         }
1031
1032         return skb;
1033 }
1034
1035 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1036 {
1037         struct ethhdr *eh = (struct ethhdr *)skb->data;
1038         u16 offset = ETH_HLEN;
1039
1040         if (eh->h_proto == htons(ETH_P_IPV6)) {
1041                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1042
1043                 offset += sizeof(struct ipv6hdr);
1044                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1045                     ip6h->nexthdr != NEXTHDR_UDP) {
1046                         struct ipv6_opt_hdr *ehdr =
1047                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1048
1049                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1050                         if (ehdr->hdrlen == 0xff)
1051                                 return true;
1052                 }
1053         }
1054         return false;
1055 }
1056
1057 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1058 {
1059         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1060 }
1061
1062 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1063 {
1064         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1065 }
1066
1067 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1068                                                   struct sk_buff *skb,
1069                                                   struct be_wrb_params
1070                                                   *wrb_params)
1071 {
1072         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1073         unsigned int eth_hdr_len;
1074         struct iphdr *ip;
1075
1076         /* For padded packets, BE HW modifies tot_len field in IP header
1077          * incorrecly when VLAN tag is inserted by HW.
1078          * For padded packets, Lancer computes incorrect checksum.
1079          */
1080         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1081                                                 VLAN_ETH_HLEN : ETH_HLEN;
1082         if (skb->len <= 60 &&
1083             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1084             is_ipv4_pkt(skb)) {
1085                 ip = (struct iphdr *)ip_hdr(skb);
1086                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1087         }
1088
1089         /* If vlan tag is already inlined in the packet, skip HW VLAN
1090          * tagging in pvid-tagging mode
1091          */
1092         if (be_pvid_tagging_enabled(adapter) &&
1093             veh->h_vlan_proto == htons(ETH_P_8021Q))
1094                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1095
1096         /* HW has a bug wherein it will calculate CSUM for VLAN
1097          * pkts even though it is disabled.
1098          * Manually insert VLAN in pkt.
1099          */
1100         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1101             skb_vlan_tag_present(skb)) {
1102                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1103                 if (unlikely(!skb))
1104                         goto err;
1105         }
1106
1107         /* HW may lockup when VLAN HW tagging is requested on
1108          * certain ipv6 packets. Drop such pkts if the HW workaround to
1109          * skip HW tagging is not enabled by FW.
1110          */
1111         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1112                      (adapter->pvid || adapter->qnq_vid) &&
1113                      !qnq_async_evt_rcvd(adapter)))
1114                 goto tx_drop;
1115
1116         /* Manual VLAN tag insertion to prevent:
1117          * ASIC lockup when the ASIC inserts VLAN tag into
1118          * certain ipv6 packets. Insert VLAN tags in driver,
1119          * and set event, completion, vlan bits accordingly
1120          * in the Tx WRB.
1121          */
1122         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1123             be_vlan_tag_tx_chk(adapter, skb)) {
1124                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1125                 if (unlikely(!skb))
1126                         goto err;
1127         }
1128
1129         return skb;
1130 tx_drop:
1131         dev_kfree_skb_any(skb);
1132 err:
1133         return NULL;
1134 }
1135
1136 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1137                                            struct sk_buff *skb,
1138                                            struct be_wrb_params *wrb_params)
1139 {
1140         int err;
1141
1142         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1143          * packets that are 32b or less may cause a transmit stall
1144          * on that port. The workaround is to pad such packets
1145          * (len <= 32 bytes) to a minimum length of 36b.
1146          */
1147         if (skb->len <= 32) {
1148                 if (skb_put_padto(skb, 36))
1149                         return NULL;
1150         }
1151
1152         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1153                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1154                 if (!skb)
1155                         return NULL;
1156         }
1157
1158         /* The stack can send us skbs with length greater than
1159          * what the HW can handle. Trim the extra bytes.
1160          */
1161         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1162         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1163         WARN_ON(err);
1164
1165         return skb;
1166 }
1167
1168 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1169 {
1170         struct be_queue_info *txq = &txo->q;
1171         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1172
1173         /* Mark the last request eventable if it hasn't been marked already */
1174         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1175                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1176
1177         /* compose a dummy wrb if there are odd set of wrbs to notify */
1178         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1179                 wrb_fill_dummy(queue_head_node(txq));
1180                 queue_head_inc(txq);
1181                 atomic_inc(&txq->used);
1182                 txo->pend_wrb_cnt++;
1183                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1184                                            TX_HDR_WRB_NUM_SHIFT);
1185                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1186                                           TX_HDR_WRB_NUM_SHIFT);
1187         }
1188         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1189         txo->pend_wrb_cnt = 0;
1190 }
1191
1192 /* OS2BMC related */
1193
1194 #define DHCP_CLIENT_PORT        68
1195 #define DHCP_SERVER_PORT        67
1196 #define NET_BIOS_PORT1          137
1197 #define NET_BIOS_PORT2          138
1198 #define DHCPV6_RAS_PORT         547
1199
1200 #define is_mc_allowed_on_bmc(adapter, eh)       \
1201         (!is_multicast_filt_enabled(adapter) && \
1202          is_multicast_ether_addr(eh->h_dest) && \
1203          !is_broadcast_ether_addr(eh->h_dest))
1204
1205 #define is_bc_allowed_on_bmc(adapter, eh)       \
1206         (!is_broadcast_filt_enabled(adapter) && \
1207          is_broadcast_ether_addr(eh->h_dest))
1208
1209 #define is_arp_allowed_on_bmc(adapter, skb)     \
1210         (is_arp(skb) && is_arp_filt_enabled(adapter))
1211
1212 #define is_broadcast_packet(eh, adapter)        \
1213                 (is_multicast_ether_addr(eh->h_dest) && \
1214                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1215
1216 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1217
1218 #define is_arp_filt_enabled(adapter)    \
1219                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1220
1221 #define is_dhcp_client_filt_enabled(adapter)    \
1222                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1223
1224 #define is_dhcp_srvr_filt_enabled(adapter)      \
1225                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1226
1227 #define is_nbios_filt_enabled(adapter)  \
1228                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1229
1230 #define is_ipv6_na_filt_enabled(adapter)        \
1231                 (adapter->bmc_filt_mask &       \
1232                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1233
1234 #define is_ipv6_ra_filt_enabled(adapter)        \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1236
1237 #define is_ipv6_ras_filt_enabled(adapter)       \
1238                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1239
1240 #define is_broadcast_filt_enabled(adapter)      \
1241                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1242
1243 #define is_multicast_filt_enabled(adapter)      \
1244                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1245
1246 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1247                                struct sk_buff **skb)
1248 {
1249         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1250         bool os2bmc = false;
1251
1252         if (!be_is_os2bmc_enabled(adapter))
1253                 goto done;
1254
1255         if (!is_multicast_ether_addr(eh->h_dest))
1256                 goto done;
1257
1258         if (is_mc_allowed_on_bmc(adapter, eh) ||
1259             is_bc_allowed_on_bmc(adapter, eh) ||
1260             is_arp_allowed_on_bmc(adapter, (*skb))) {
1261                 os2bmc = true;
1262                 goto done;
1263         }
1264
1265         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1266                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1267                 u8 nexthdr = hdr->nexthdr;
1268
1269                 if (nexthdr == IPPROTO_ICMPV6) {
1270                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1271
1272                         switch (icmp6->icmp6_type) {
1273                         case NDISC_ROUTER_ADVERTISEMENT:
1274                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1275                                 goto done;
1276                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1277                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1278                                 goto done;
1279                         default:
1280                                 break;
1281                         }
1282                 }
1283         }
1284
1285         if (is_udp_pkt((*skb))) {
1286                 struct udphdr *udp = udp_hdr((*skb));
1287
1288                 switch (ntohs(udp->dest)) {
1289                 case DHCP_CLIENT_PORT:
1290                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1291                         goto done;
1292                 case DHCP_SERVER_PORT:
1293                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1294                         goto done;
1295                 case NET_BIOS_PORT1:
1296                 case NET_BIOS_PORT2:
1297                         os2bmc = is_nbios_filt_enabled(adapter);
1298                         goto done;
1299                 case DHCPV6_RAS_PORT:
1300                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1301                         goto done;
1302                 default:
1303                         break;
1304                 }
1305         }
1306 done:
1307         /* For packets over a vlan, which are destined
1308          * to BMC, asic expects the vlan to be inline in the packet.
1309          */
1310         if (os2bmc)
1311                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1312
1313         return os2bmc;
1314 }
1315
1316 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1317 {
1318         struct be_adapter *adapter = netdev_priv(netdev);
1319         u16 q_idx = skb_get_queue_mapping(skb);
1320         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1321         struct be_wrb_params wrb_params = { 0 };
1322         bool flush = !skb->xmit_more;
1323         u16 wrb_cnt;
1324
1325         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1326         if (unlikely(!skb))
1327                 goto drop;
1328
1329         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1330
1331         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1332         if (unlikely(!wrb_cnt)) {
1333                 dev_kfree_skb_any(skb);
1334                 goto drop;
1335         }
1336
1337         /* if os2bmc is enabled and if the pkt is destined to bmc,
1338          * enqueue the pkt a 2nd time with mgmt bit set.
1339          */
1340         if (be_send_pkt_to_bmc(adapter, &skb)) {
1341                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1342                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1343                 if (unlikely(!wrb_cnt))
1344                         goto drop;
1345                 else
1346                         skb_get(skb);
1347         }
1348
1349         if (be_is_txq_full(txo)) {
1350                 netif_stop_subqueue(netdev, q_idx);
1351                 tx_stats(txo)->tx_stops++;
1352         }
1353
1354         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1355                 be_xmit_flush(adapter, txo);
1356
1357         return NETDEV_TX_OK;
1358 drop:
1359         tx_stats(txo)->tx_drv_drops++;
1360         /* Flush the already enqueued tx requests */
1361         if (flush && txo->pend_wrb_cnt)
1362                 be_xmit_flush(adapter, txo);
1363
1364         return NETDEV_TX_OK;
1365 }
1366
1367 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1368 {
1369         struct be_adapter *adapter = netdev_priv(netdev);
1370         struct device *dev = &adapter->pdev->dev;
1371
1372         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1373                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1374                          BE_MIN_MTU, BE_MAX_MTU);
1375                 return -EINVAL;
1376         }
1377
1378         dev_info(dev, "MTU changed from %d to %d bytes\n",
1379                  netdev->mtu, new_mtu);
1380         netdev->mtu = new_mtu;
1381         return 0;
1382 }
1383
1384 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1385 {
1386         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1387                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1388 }
1389
1390 static int be_set_vlan_promisc(struct be_adapter *adapter)
1391 {
1392         struct device *dev = &adapter->pdev->dev;
1393         int status;
1394
1395         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1396                 return 0;
1397
1398         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1399         if (!status) {
1400                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1401                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1402         } else {
1403                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1404         }
1405         return status;
1406 }
1407
1408 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1409 {
1410         struct device *dev = &adapter->pdev->dev;
1411         int status;
1412
1413         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1414         if (!status) {
1415                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1416                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1417         }
1418         return status;
1419 }
1420
1421 /*
1422  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1423  * If the user configures more, place BE in vlan promiscuous mode.
1424  */
1425 static int be_vid_config(struct be_adapter *adapter)
1426 {
1427         struct device *dev = &adapter->pdev->dev;
1428         u16 vids[BE_NUM_VLANS_SUPPORTED];
1429         u16 num = 0, i = 0;
1430         int status = 0;
1431
1432         /* No need to change the VLAN state if the I/F is in promiscuous */
1433         if (adapter->netdev->flags & IFF_PROMISC)
1434                 return 0;
1435
1436         if (adapter->vlans_added > be_max_vlans(adapter))
1437                 return be_set_vlan_promisc(adapter);
1438
1439         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1440                 status = be_clear_vlan_promisc(adapter);
1441                 if (status)
1442                         return status;
1443         }
1444         /* Construct VLAN Table to give to HW */
1445         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1446                 vids[num++] = cpu_to_le16(i);
1447
1448         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1449         if (status) {
1450                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1451                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1452                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1453                     addl_status(status) ==
1454                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1455                         return be_set_vlan_promisc(adapter);
1456         }
1457         return status;
1458 }
1459
1460 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1461 {
1462         struct be_adapter *adapter = netdev_priv(netdev);
1463         int status = 0;
1464
1465         mutex_lock(&adapter->rx_filter_lock);
1466
1467         /* Packets with VID 0 are always received by Lancer by default */
1468         if (lancer_chip(adapter) && vid == 0)
1469                 goto done;
1470
1471         if (test_bit(vid, adapter->vids))
1472                 goto done;
1473
1474         set_bit(vid, adapter->vids);
1475         adapter->vlans_added++;
1476
1477         status = be_vid_config(adapter);
1478 done:
1479         mutex_unlock(&adapter->rx_filter_lock);
1480         return status;
1481 }
1482
1483 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1484 {
1485         struct be_adapter *adapter = netdev_priv(netdev);
1486         int status = 0;
1487
1488         mutex_lock(&adapter->rx_filter_lock);
1489
1490         /* Packets with VID 0 are always received by Lancer by default */
1491         if (lancer_chip(adapter) && vid == 0)
1492                 goto done;
1493
1494         if (!test_bit(vid, adapter->vids))
1495                 goto done;
1496
1497         clear_bit(vid, adapter->vids);
1498         adapter->vlans_added--;
1499
1500         status = be_vid_config(adapter);
1501 done:
1502         mutex_unlock(&adapter->rx_filter_lock);
1503         return status;
1504 }
1505
1506 static void be_set_all_promisc(struct be_adapter *adapter)
1507 {
1508         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1509         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1510 }
1511
1512 static void be_set_mc_promisc(struct be_adapter *adapter)
1513 {
1514         int status;
1515
1516         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1517                 return;
1518
1519         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1520         if (!status)
1521                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1522 }
1523
1524 static void be_set_uc_promisc(struct be_adapter *adapter)
1525 {
1526         int status;
1527
1528         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1529                 return;
1530
1531         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1532         if (!status)
1533                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1534 }
1535
1536 static void be_clear_uc_promisc(struct be_adapter *adapter)
1537 {
1538         int status;
1539
1540         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1541                 return;
1542
1543         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1544         if (!status)
1545                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1546 }
1547
1548 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1549  * We use a single callback function for both sync and unsync. We really don't
1550  * add/remove addresses through this callback. But, we use it to detect changes
1551  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1552  */
1553 static int be_uc_list_update(struct net_device *netdev,
1554                              const unsigned char *addr)
1555 {
1556         struct be_adapter *adapter = netdev_priv(netdev);
1557
1558         adapter->update_uc_list = true;
1559         return 0;
1560 }
1561
1562 static int be_mc_list_update(struct net_device *netdev,
1563                              const unsigned char *addr)
1564 {
1565         struct be_adapter *adapter = netdev_priv(netdev);
1566
1567         adapter->update_mc_list = true;
1568         return 0;
1569 }
1570
1571 static void be_set_mc_list(struct be_adapter *adapter)
1572 {
1573         struct net_device *netdev = adapter->netdev;
1574         struct netdev_hw_addr *ha;
1575         bool mc_promisc = false;
1576         int status;
1577
1578         netif_addr_lock_bh(netdev);
1579         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1580
1581         if (netdev->flags & IFF_PROMISC) {
1582                 adapter->update_mc_list = false;
1583         } else if (netdev->flags & IFF_ALLMULTI ||
1584                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1585                 /* Enable multicast promisc if num configured exceeds
1586                  * what we support
1587                  */
1588                 mc_promisc = true;
1589                 adapter->update_mc_list = false;
1590         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1591                 /* Update mc-list unconditionally if the iface was previously
1592                  * in mc-promisc mode and now is out of that mode.
1593                  */
1594                 adapter->update_mc_list = true;
1595         }
1596
1597         if (adapter->update_mc_list) {
1598                 int i = 0;
1599
1600                 /* cache the mc-list in adapter */
1601                 netdev_for_each_mc_addr(ha, netdev) {
1602                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1603                         i++;
1604                 }
1605                 adapter->mc_count = netdev_mc_count(netdev);
1606         }
1607         netif_addr_unlock_bh(netdev);
1608
1609         if (mc_promisc) {
1610                 be_set_mc_promisc(adapter);
1611         } else if (adapter->update_mc_list) {
1612                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1613                 if (!status)
1614                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1615                 else
1616                         be_set_mc_promisc(adapter);
1617
1618                 adapter->update_mc_list = false;
1619         }
1620 }
1621
1622 static void be_clear_mc_list(struct be_adapter *adapter)
1623 {
1624         struct net_device *netdev = adapter->netdev;
1625
1626         __dev_mc_unsync(netdev, NULL);
1627         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1628         adapter->mc_count = 0;
1629 }
1630
1631 static void be_set_uc_list(struct be_adapter *adapter)
1632 {
1633         struct net_device *netdev = adapter->netdev;
1634         struct netdev_hw_addr *ha;
1635         bool uc_promisc = false;
1636         int curr_uc_macs = 0, i;
1637
1638         netif_addr_lock_bh(netdev);
1639         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1640
1641         if (netdev->flags & IFF_PROMISC) {
1642                 adapter->update_uc_list = false;
1643         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1644                 uc_promisc = true;
1645                 adapter->update_uc_list = false;
1646         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1647                 /* Update uc-list unconditionally if the iface was previously
1648                  * in uc-promisc mode and now is out of that mode.
1649                  */
1650                 adapter->update_uc_list = true;
1651         }
1652
1653         if (adapter->update_uc_list) {
1654                 i = 1; /* First slot is claimed by the Primary MAC */
1655
1656                 /* cache the uc-list in adapter array */
1657                 netdev_for_each_uc_addr(ha, netdev) {
1658                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1659                         i++;
1660                 }
1661                 curr_uc_macs = netdev_uc_count(netdev);
1662         }
1663         netif_addr_unlock_bh(netdev);
1664
1665         if (uc_promisc) {
1666                 be_set_uc_promisc(adapter);
1667         } else if (adapter->update_uc_list) {
1668                 be_clear_uc_promisc(adapter);
1669
1670                 for (i = 0; i < adapter->uc_macs; i++)
1671                         be_cmd_pmac_del(adapter, adapter->if_handle,
1672                                         adapter->pmac_id[i + 1], 0);
1673
1674                 for (i = 0; i < curr_uc_macs; i++)
1675                         be_cmd_pmac_add(adapter, adapter->uc_list[i].mac,
1676                                         adapter->if_handle,
1677                                         &adapter->pmac_id[i + 1], 0);
1678                 adapter->uc_macs = curr_uc_macs;
1679                 adapter->update_uc_list = false;
1680         }
1681 }
1682
1683 static void be_clear_uc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         int i;
1687
1688         __dev_uc_unsync(netdev, NULL);
1689         for (i = 0; i < adapter->uc_macs; i++)
1690                 be_cmd_pmac_del(adapter, adapter->if_handle,
1691                                 adapter->pmac_id[i + 1], 0);
1692         adapter->uc_macs = 0;
1693 }
1694
1695 static void __be_set_rx_mode(struct be_adapter *adapter)
1696 {
1697         struct net_device *netdev = adapter->netdev;
1698
1699         mutex_lock(&adapter->rx_filter_lock);
1700
1701         if (netdev->flags & IFF_PROMISC) {
1702                 if (!be_in_all_promisc(adapter))
1703                         be_set_all_promisc(adapter);
1704         } else if (be_in_all_promisc(adapter)) {
1705                 /* We need to re-program the vlan-list or clear
1706                  * vlan-promisc mode (if needed) when the interface
1707                  * comes out of promisc mode.
1708                  */
1709                 be_vid_config(adapter);
1710         }
1711
1712         be_set_uc_list(adapter);
1713         be_set_mc_list(adapter);
1714
1715         mutex_unlock(&adapter->rx_filter_lock);
1716 }
1717
1718 static void be_work_set_rx_mode(struct work_struct *work)
1719 {
1720         struct be_cmd_work *cmd_work =
1721                                 container_of(work, struct be_cmd_work, work);
1722
1723         __be_set_rx_mode(cmd_work->adapter);
1724         kfree(cmd_work);
1725 }
1726
1727 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1728 {
1729         struct be_adapter *adapter = netdev_priv(netdev);
1730         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1731         int status;
1732
1733         if (!sriov_enabled(adapter))
1734                 return -EPERM;
1735
1736         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1737                 return -EINVAL;
1738
1739         /* Proceed further only if user provided MAC is different
1740          * from active MAC
1741          */
1742         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1743                 return 0;
1744
1745         if (BEx_chip(adapter)) {
1746                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1747                                 vf + 1);
1748
1749                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1750                                          &vf_cfg->pmac_id, vf + 1);
1751         } else {
1752                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1753                                         vf + 1);
1754         }
1755
1756         if (status) {
1757                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1758                         mac, vf, status);
1759                 return be_cmd_status(status);
1760         }
1761
1762         ether_addr_copy(vf_cfg->mac_addr, mac);
1763
1764         return 0;
1765 }
1766
1767 static int be_get_vf_config(struct net_device *netdev, int vf,
1768                             struct ifla_vf_info *vi)
1769 {
1770         struct be_adapter *adapter = netdev_priv(netdev);
1771         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1772
1773         if (!sriov_enabled(adapter))
1774                 return -EPERM;
1775
1776         if (vf >= adapter->num_vfs)
1777                 return -EINVAL;
1778
1779         vi->vf = vf;
1780         vi->max_tx_rate = vf_cfg->tx_rate;
1781         vi->min_tx_rate = 0;
1782         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1783         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1784         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1785         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1786         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1787
1788         return 0;
1789 }
1790
1791 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1792 {
1793         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1794         u16 vids[BE_NUM_VLANS_SUPPORTED];
1795         int vf_if_id = vf_cfg->if_handle;
1796         int status;
1797
1798         /* Enable Transparent VLAN Tagging */
1799         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1800         if (status)
1801                 return status;
1802
1803         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1804         vids[0] = 0;
1805         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1806         if (!status)
1807                 dev_info(&adapter->pdev->dev,
1808                          "Cleared guest VLANs on VF%d", vf);
1809
1810         /* After TVT is enabled, disallow VFs to program VLAN filters */
1811         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1812                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1813                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1814                 if (!status)
1815                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1816         }
1817         return 0;
1818 }
1819
1820 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1821 {
1822         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1823         struct device *dev = &adapter->pdev->dev;
1824         int status;
1825
1826         /* Reset Transparent VLAN Tagging. */
1827         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1828                                        vf_cfg->if_handle, 0, 0);
1829         if (status)
1830                 return status;
1831
1832         /* Allow VFs to program VLAN filtering */
1833         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1834                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1835                                                   BE_PRIV_FILTMGMT, vf + 1);
1836                 if (!status) {
1837                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1838                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1839                 }
1840         }
1841
1842         dev_info(dev,
1843                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1844         return 0;
1845 }
1846
1847 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1848 {
1849         struct be_adapter *adapter = netdev_priv(netdev);
1850         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1851         int status;
1852
1853         if (!sriov_enabled(adapter))
1854                 return -EPERM;
1855
1856         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1857                 return -EINVAL;
1858
1859         if (vlan || qos) {
1860                 vlan |= qos << VLAN_PRIO_SHIFT;
1861                 status = be_set_vf_tvt(adapter, vf, vlan);
1862         } else {
1863                 status = be_clear_vf_tvt(adapter, vf);
1864         }
1865
1866         if (status) {
1867                 dev_err(&adapter->pdev->dev,
1868                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1869                         status);
1870                 return be_cmd_status(status);
1871         }
1872
1873         vf_cfg->vlan_tag = vlan;
1874         return 0;
1875 }
1876
1877 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1878                              int min_tx_rate, int max_tx_rate)
1879 {
1880         struct be_adapter *adapter = netdev_priv(netdev);
1881         struct device *dev = &adapter->pdev->dev;
1882         int percent_rate, status = 0;
1883         u16 link_speed = 0;
1884         u8 link_status;
1885
1886         if (!sriov_enabled(adapter))
1887                 return -EPERM;
1888
1889         if (vf >= adapter->num_vfs)
1890                 return -EINVAL;
1891
1892         if (min_tx_rate)
1893                 return -EINVAL;
1894
1895         if (!max_tx_rate)
1896                 goto config_qos;
1897
1898         status = be_cmd_link_status_query(adapter, &link_speed,
1899                                           &link_status, 0);
1900         if (status)
1901                 goto err;
1902
1903         if (!link_status) {
1904                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1905                 status = -ENETDOWN;
1906                 goto err;
1907         }
1908
1909         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1910                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1911                         link_speed);
1912                 status = -EINVAL;
1913                 goto err;
1914         }
1915
1916         /* On Skyhawk the QOS setting must be done only as a % value */
1917         percent_rate = link_speed / 100;
1918         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1919                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1920                         percent_rate);
1921                 status = -EINVAL;
1922                 goto err;
1923         }
1924
1925 config_qos:
1926         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1927         if (status)
1928                 goto err;
1929
1930         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1931         return 0;
1932
1933 err:
1934         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1935                 max_tx_rate, vf);
1936         return be_cmd_status(status);
1937 }
1938
1939 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1940                                 int link_state)
1941 {
1942         struct be_adapter *adapter = netdev_priv(netdev);
1943         int status;
1944
1945         if (!sriov_enabled(adapter))
1946                 return -EPERM;
1947
1948         if (vf >= adapter->num_vfs)
1949                 return -EINVAL;
1950
1951         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1952         if (status) {
1953                 dev_err(&adapter->pdev->dev,
1954                         "Link state change on VF %d failed: %#x\n", vf, status);
1955                 return be_cmd_status(status);
1956         }
1957
1958         adapter->vf_cfg[vf].plink_tracking = link_state;
1959
1960         return 0;
1961 }
1962
1963 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1964 {
1965         struct be_adapter *adapter = netdev_priv(netdev);
1966         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1967         u8 spoofchk;
1968         int status;
1969
1970         if (!sriov_enabled(adapter))
1971                 return -EPERM;
1972
1973         if (vf >= adapter->num_vfs)
1974                 return -EINVAL;
1975
1976         if (BEx_chip(adapter))
1977                 return -EOPNOTSUPP;
1978
1979         if (enable == vf_cfg->spoofchk)
1980                 return 0;
1981
1982         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1983
1984         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1985                                        0, spoofchk);
1986         if (status) {
1987                 dev_err(&adapter->pdev->dev,
1988                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1989                 return be_cmd_status(status);
1990         }
1991
1992         vf_cfg->spoofchk = enable;
1993         return 0;
1994 }
1995
1996 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1997                           ulong now)
1998 {
1999         aic->rx_pkts_prev = rx_pkts;
2000         aic->tx_reqs_prev = tx_pkts;
2001         aic->jiffies = now;
2002 }
2003
2004 static int be_get_new_eqd(struct be_eq_obj *eqo)
2005 {
2006         struct be_adapter *adapter = eqo->adapter;
2007         int eqd, start;
2008         struct be_aic_obj *aic;
2009         struct be_rx_obj *rxo;
2010         struct be_tx_obj *txo;
2011         u64 rx_pkts = 0, tx_pkts = 0;
2012         ulong now;
2013         u32 pps, delta;
2014         int i;
2015
2016         aic = &adapter->aic_obj[eqo->idx];
2017         if (!aic->enable) {
2018                 if (aic->jiffies)
2019                         aic->jiffies = 0;
2020                 eqd = aic->et_eqd;
2021                 return eqd;
2022         }
2023
2024         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2025                 do {
2026                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2027                         rx_pkts += rxo->stats.rx_pkts;
2028                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2029         }
2030
2031         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2032                 do {
2033                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2034                         tx_pkts += txo->stats.tx_reqs;
2035                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2036         }
2037
2038         /* Skip, if wrapped around or first calculation */
2039         now = jiffies;
2040         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2041             rx_pkts < aic->rx_pkts_prev ||
2042             tx_pkts < aic->tx_reqs_prev) {
2043                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2044                 return aic->prev_eqd;
2045         }
2046
2047         delta = jiffies_to_msecs(now - aic->jiffies);
2048         if (delta == 0)
2049                 return aic->prev_eqd;
2050
2051         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2052                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2053         eqd = (pps / 15000) << 2;
2054
2055         if (eqd < 8)
2056                 eqd = 0;
2057         eqd = min_t(u32, eqd, aic->max_eqd);
2058         eqd = max_t(u32, eqd, aic->min_eqd);
2059
2060         be_aic_update(aic, rx_pkts, tx_pkts, now);
2061
2062         return eqd;
2063 }
2064
2065 /* For Skyhawk-R only */
2066 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2067 {
2068         struct be_adapter *adapter = eqo->adapter;
2069         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2070         ulong now = jiffies;
2071         int eqd;
2072         u32 mult_enc;
2073
2074         if (!aic->enable)
2075                 return 0;
2076
2077         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2078                 eqd = aic->prev_eqd;
2079         else
2080                 eqd = be_get_new_eqd(eqo);
2081
2082         if (eqd > 100)
2083                 mult_enc = R2I_DLY_ENC_1;
2084         else if (eqd > 60)
2085                 mult_enc = R2I_DLY_ENC_2;
2086         else if (eqd > 20)
2087                 mult_enc = R2I_DLY_ENC_3;
2088         else
2089                 mult_enc = R2I_DLY_ENC_0;
2090
2091         aic->prev_eqd = eqd;
2092
2093         return mult_enc;
2094 }
2095
2096 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2097 {
2098         struct be_set_eqd set_eqd[MAX_EVT_QS];
2099         struct be_aic_obj *aic;
2100         struct be_eq_obj *eqo;
2101         int i, num = 0, eqd;
2102
2103         for_all_evt_queues(adapter, eqo, i) {
2104                 aic = &adapter->aic_obj[eqo->idx];
2105                 eqd = be_get_new_eqd(eqo);
2106                 if (force_update || eqd != aic->prev_eqd) {
2107                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2108                         set_eqd[num].eq_id = eqo->q.id;
2109                         aic->prev_eqd = eqd;
2110                         num++;
2111                 }
2112         }
2113
2114         if (num)
2115                 be_cmd_modify_eqd(adapter, set_eqd, num);
2116 }
2117
2118 static void be_rx_stats_update(struct be_rx_obj *rxo,
2119                                struct be_rx_compl_info *rxcp)
2120 {
2121         struct be_rx_stats *stats = rx_stats(rxo);
2122
2123         u64_stats_update_begin(&stats->sync);
2124         stats->rx_compl++;
2125         stats->rx_bytes += rxcp->pkt_size;
2126         stats->rx_pkts++;
2127         if (rxcp->tunneled)
2128                 stats->rx_vxlan_offload_pkts++;
2129         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2130                 stats->rx_mcast_pkts++;
2131         if (rxcp->err)
2132                 stats->rx_compl_err++;
2133         u64_stats_update_end(&stats->sync);
2134 }
2135
2136 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2137 {
2138         /* L4 checksum is not reliable for non TCP/UDP packets.
2139          * Also ignore ipcksm for ipv6 pkts
2140          */
2141         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2142                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2143 }
2144
2145 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2146 {
2147         struct be_adapter *adapter = rxo->adapter;
2148         struct be_rx_page_info *rx_page_info;
2149         struct be_queue_info *rxq = &rxo->q;
2150         u32 frag_idx = rxq->tail;
2151
2152         rx_page_info = &rxo->page_info_tbl[frag_idx];
2153         BUG_ON(!rx_page_info->page);
2154
2155         if (rx_page_info->last_frag) {
2156                 dma_unmap_page(&adapter->pdev->dev,
2157                                dma_unmap_addr(rx_page_info, bus),
2158                                adapter->big_page_size, DMA_FROM_DEVICE);
2159                 rx_page_info->last_frag = false;
2160         } else {
2161                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2162                                         dma_unmap_addr(rx_page_info, bus),
2163                                         rx_frag_size, DMA_FROM_DEVICE);
2164         }
2165
2166         queue_tail_inc(rxq);
2167         atomic_dec(&rxq->used);
2168         return rx_page_info;
2169 }
2170
2171 /* Throwaway the data in the Rx completion */
2172 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2173                                 struct be_rx_compl_info *rxcp)
2174 {
2175         struct be_rx_page_info *page_info;
2176         u16 i, num_rcvd = rxcp->num_rcvd;
2177
2178         for (i = 0; i < num_rcvd; i++) {
2179                 page_info = get_rx_page_info(rxo);
2180                 put_page(page_info->page);
2181                 memset(page_info, 0, sizeof(*page_info));
2182         }
2183 }
2184
2185 /*
2186  * skb_fill_rx_data forms a complete skb for an ether frame
2187  * indicated by rxcp.
2188  */
2189 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2190                              struct be_rx_compl_info *rxcp)
2191 {
2192         struct be_rx_page_info *page_info;
2193         u16 i, j;
2194         u16 hdr_len, curr_frag_len, remaining;
2195         u8 *start;
2196
2197         page_info = get_rx_page_info(rxo);
2198         start = page_address(page_info->page) + page_info->page_offset;
2199         prefetch(start);
2200
2201         /* Copy data in the first descriptor of this completion */
2202         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2203
2204         skb->len = curr_frag_len;
2205         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2206                 memcpy(skb->data, start, curr_frag_len);
2207                 /* Complete packet has now been moved to data */
2208                 put_page(page_info->page);
2209                 skb->data_len = 0;
2210                 skb->tail += curr_frag_len;
2211         } else {
2212                 hdr_len = ETH_HLEN;
2213                 memcpy(skb->data, start, hdr_len);
2214                 skb_shinfo(skb)->nr_frags = 1;
2215                 skb_frag_set_page(skb, 0, page_info->page);
2216                 skb_shinfo(skb)->frags[0].page_offset =
2217                                         page_info->page_offset + hdr_len;
2218                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2219                                   curr_frag_len - hdr_len);
2220                 skb->data_len = curr_frag_len - hdr_len;
2221                 skb->truesize += rx_frag_size;
2222                 skb->tail += hdr_len;
2223         }
2224         page_info->page = NULL;
2225
2226         if (rxcp->pkt_size <= rx_frag_size) {
2227                 BUG_ON(rxcp->num_rcvd != 1);
2228                 return;
2229         }
2230
2231         /* More frags present for this completion */
2232         remaining = rxcp->pkt_size - curr_frag_len;
2233         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2234                 page_info = get_rx_page_info(rxo);
2235                 curr_frag_len = min(remaining, rx_frag_size);
2236
2237                 /* Coalesce all frags from the same physical page in one slot */
2238                 if (page_info->page_offset == 0) {
2239                         /* Fresh page */
2240                         j++;
2241                         skb_frag_set_page(skb, j, page_info->page);
2242                         skb_shinfo(skb)->frags[j].page_offset =
2243                                                         page_info->page_offset;
2244                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2245                         skb_shinfo(skb)->nr_frags++;
2246                 } else {
2247                         put_page(page_info->page);
2248                 }
2249
2250                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2251                 skb->len += curr_frag_len;
2252                 skb->data_len += curr_frag_len;
2253                 skb->truesize += rx_frag_size;
2254                 remaining -= curr_frag_len;
2255                 page_info->page = NULL;
2256         }
2257         BUG_ON(j > MAX_SKB_FRAGS);
2258 }
2259
2260 /* Process the RX completion indicated by rxcp when GRO is disabled */
2261 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2262                                 struct be_rx_compl_info *rxcp)
2263 {
2264         struct be_adapter *adapter = rxo->adapter;
2265         struct net_device *netdev = adapter->netdev;
2266         struct sk_buff *skb;
2267
2268         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2269         if (unlikely(!skb)) {
2270                 rx_stats(rxo)->rx_drops_no_skbs++;
2271                 be_rx_compl_discard(rxo, rxcp);
2272                 return;
2273         }
2274
2275         skb_fill_rx_data(rxo, skb, rxcp);
2276
2277         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2278                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2279         else
2280                 skb_checksum_none_assert(skb);
2281
2282         skb->protocol = eth_type_trans(skb, netdev);
2283         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2284         if (netdev->features & NETIF_F_RXHASH)
2285                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2286
2287         skb->csum_level = rxcp->tunneled;
2288         skb_mark_napi_id(skb, napi);
2289
2290         if (rxcp->vlanf)
2291                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2292
2293         netif_receive_skb(skb);
2294 }
2295
2296 /* Process the RX completion indicated by rxcp when GRO is enabled */
2297 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2298                                     struct napi_struct *napi,
2299                                     struct be_rx_compl_info *rxcp)
2300 {
2301         struct be_adapter *adapter = rxo->adapter;
2302         struct be_rx_page_info *page_info;
2303         struct sk_buff *skb = NULL;
2304         u16 remaining, curr_frag_len;
2305         u16 i, j;
2306
2307         skb = napi_get_frags(napi);
2308         if (!skb) {
2309                 be_rx_compl_discard(rxo, rxcp);
2310                 return;
2311         }
2312
2313         remaining = rxcp->pkt_size;
2314         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2315                 page_info = get_rx_page_info(rxo);
2316
2317                 curr_frag_len = min(remaining, rx_frag_size);
2318
2319                 /* Coalesce all frags from the same physical page in one slot */
2320                 if (i == 0 || page_info->page_offset == 0) {
2321                         /* First frag or Fresh page */
2322                         j++;
2323                         skb_frag_set_page(skb, j, page_info->page);
2324                         skb_shinfo(skb)->frags[j].page_offset =
2325                                                         page_info->page_offset;
2326                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2327                 } else {
2328                         put_page(page_info->page);
2329                 }
2330                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2331                 skb->truesize += rx_frag_size;
2332                 remaining -= curr_frag_len;
2333                 memset(page_info, 0, sizeof(*page_info));
2334         }
2335         BUG_ON(j > MAX_SKB_FRAGS);
2336
2337         skb_shinfo(skb)->nr_frags = j + 1;
2338         skb->len = rxcp->pkt_size;
2339         skb->data_len = rxcp->pkt_size;
2340         skb->ip_summed = CHECKSUM_UNNECESSARY;
2341         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2342         if (adapter->netdev->features & NETIF_F_RXHASH)
2343                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2344
2345         skb->csum_level = rxcp->tunneled;
2346
2347         if (rxcp->vlanf)
2348                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2349
2350         napi_gro_frags(napi);
2351 }
2352
2353 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2354                                  struct be_rx_compl_info *rxcp)
2355 {
2356         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2357         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2358         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2359         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2360         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2361         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2362         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2363         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2364         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2365         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2366         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2367         if (rxcp->vlanf) {
2368                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2369                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2370         }
2371         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2372         rxcp->tunneled =
2373                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2374 }
2375
2376 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2377                                  struct be_rx_compl_info *rxcp)
2378 {
2379         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2380         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2381         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2382         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2383         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2384         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2385         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2386         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2387         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2388         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2389         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2390         if (rxcp->vlanf) {
2391                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2392                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2393         }
2394         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2395         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2396 }
2397
2398 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2399 {
2400         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2401         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2402         struct be_adapter *adapter = rxo->adapter;
2403
2404         /* For checking the valid bit it is Ok to use either definition as the
2405          * valid bit is at the same position in both v0 and v1 Rx compl */
2406         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2407                 return NULL;
2408
2409         rmb();
2410         be_dws_le_to_cpu(compl, sizeof(*compl));
2411
2412         if (adapter->be3_native)
2413                 be_parse_rx_compl_v1(compl, rxcp);
2414         else
2415                 be_parse_rx_compl_v0(compl, rxcp);
2416
2417         if (rxcp->ip_frag)
2418                 rxcp->l4_csum = 0;
2419
2420         if (rxcp->vlanf) {
2421                 /* In QNQ modes, if qnq bit is not set, then the packet was
2422                  * tagged only with the transparent outer vlan-tag and must
2423                  * not be treated as a vlan packet by host
2424                  */
2425                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2426                         rxcp->vlanf = 0;
2427
2428                 if (!lancer_chip(adapter))
2429                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2430
2431                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2432                     !test_bit(rxcp->vlan_tag, adapter->vids))
2433                         rxcp->vlanf = 0;
2434         }
2435
2436         /* As the compl has been parsed, reset it; we wont touch it again */
2437         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2438
2439         queue_tail_inc(&rxo->cq);
2440         return rxcp;
2441 }
2442
2443 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2444 {
2445         u32 order = get_order(size);
2446
2447         if (order > 0)
2448                 gfp |= __GFP_COMP;
2449         return  alloc_pages(gfp, order);
2450 }
2451
2452 /*
2453  * Allocate a page, split it to fragments of size rx_frag_size and post as
2454  * receive buffers to BE
2455  */
2456 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2457 {
2458         struct be_adapter *adapter = rxo->adapter;
2459         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2460         struct be_queue_info *rxq = &rxo->q;
2461         struct page *pagep = NULL;
2462         struct device *dev = &adapter->pdev->dev;
2463         struct be_eth_rx_d *rxd;
2464         u64 page_dmaaddr = 0, frag_dmaaddr;
2465         u32 posted, page_offset = 0, notify = 0;
2466
2467         page_info = &rxo->page_info_tbl[rxq->head];
2468         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2469                 if (!pagep) {
2470                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2471                         if (unlikely(!pagep)) {
2472                                 rx_stats(rxo)->rx_post_fail++;
2473                                 break;
2474                         }
2475                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2476                                                     adapter->big_page_size,
2477                                                     DMA_FROM_DEVICE);
2478                         if (dma_mapping_error(dev, page_dmaaddr)) {
2479                                 put_page(pagep);
2480                                 pagep = NULL;
2481                                 adapter->drv_stats.dma_map_errors++;
2482                                 break;
2483                         }
2484                         page_offset = 0;
2485                 } else {
2486                         get_page(pagep);
2487                         page_offset += rx_frag_size;
2488                 }
2489                 page_info->page_offset = page_offset;
2490                 page_info->page = pagep;
2491
2492                 rxd = queue_head_node(rxq);
2493                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2494                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2495                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2496
2497                 /* Any space left in the current big page for another frag? */
2498                 if ((page_offset + rx_frag_size + rx_frag_size) >
2499                                         adapter->big_page_size) {
2500                         pagep = NULL;
2501                         page_info->last_frag = true;
2502                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2503                 } else {
2504                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2505                 }
2506
2507                 prev_page_info = page_info;
2508                 queue_head_inc(rxq);
2509                 page_info = &rxo->page_info_tbl[rxq->head];
2510         }
2511
2512         /* Mark the last frag of a page when we break out of the above loop
2513          * with no more slots available in the RXQ
2514          */
2515         if (pagep) {
2516                 prev_page_info->last_frag = true;
2517                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2518         }
2519
2520         if (posted) {
2521                 atomic_add(posted, &rxq->used);
2522                 if (rxo->rx_post_starved)
2523                         rxo->rx_post_starved = false;
2524                 do {
2525                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2526                         be_rxq_notify(adapter, rxq->id, notify);
2527                         posted -= notify;
2528                 } while (posted);
2529         } else if (atomic_read(&rxq->used) == 0) {
2530                 /* Let be_worker replenish when memory is available */
2531                 rxo->rx_post_starved = true;
2532         }
2533 }
2534
2535 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2536 {
2537         struct be_queue_info *tx_cq = &txo->cq;
2538         struct be_tx_compl_info *txcp = &txo->txcp;
2539         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2540
2541         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2542                 return NULL;
2543
2544         /* Ensure load ordering of valid bit dword and other dwords below */
2545         rmb();
2546         be_dws_le_to_cpu(compl, sizeof(*compl));
2547
2548         txcp->status = GET_TX_COMPL_BITS(status, compl);
2549         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2550
2551         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2552         queue_tail_inc(tx_cq);
2553         return txcp;
2554 }
2555
2556 static u16 be_tx_compl_process(struct be_adapter *adapter,
2557                                struct be_tx_obj *txo, u16 last_index)
2558 {
2559         struct sk_buff **sent_skbs = txo->sent_skb_list;
2560         struct be_queue_info *txq = &txo->q;
2561         struct sk_buff *skb = NULL;
2562         bool unmap_skb_hdr = false;
2563         struct be_eth_wrb *wrb;
2564         u16 num_wrbs = 0;
2565         u32 frag_index;
2566
2567         do {
2568                 if (sent_skbs[txq->tail]) {
2569                         /* Free skb from prev req */
2570                         if (skb)
2571                                 dev_consume_skb_any(skb);
2572                         skb = sent_skbs[txq->tail];
2573                         sent_skbs[txq->tail] = NULL;
2574                         queue_tail_inc(txq);  /* skip hdr wrb */
2575                         num_wrbs++;
2576                         unmap_skb_hdr = true;
2577                 }
2578                 wrb = queue_tail_node(txq);
2579                 frag_index = txq->tail;
2580                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2581                               (unmap_skb_hdr && skb_headlen(skb)));
2582                 unmap_skb_hdr = false;
2583                 queue_tail_inc(txq);
2584                 num_wrbs++;
2585         } while (frag_index != last_index);
2586         dev_consume_skb_any(skb);
2587
2588         return num_wrbs;
2589 }
2590
2591 /* Return the number of events in the event queue */
2592 static inline int events_get(struct be_eq_obj *eqo)
2593 {
2594         struct be_eq_entry *eqe;
2595         int num = 0;
2596
2597         do {
2598                 eqe = queue_tail_node(&eqo->q);
2599                 if (eqe->evt == 0)
2600                         break;
2601
2602                 rmb();
2603                 eqe->evt = 0;
2604                 num++;
2605                 queue_tail_inc(&eqo->q);
2606         } while (true);
2607
2608         return num;
2609 }
2610
2611 /* Leaves the EQ is disarmed state */
2612 static void be_eq_clean(struct be_eq_obj *eqo)
2613 {
2614         int num = events_get(eqo);
2615
2616         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2617 }
2618
2619 /* Free posted rx buffers that were not used */
2620 static void be_rxq_clean(struct be_rx_obj *rxo)
2621 {
2622         struct be_queue_info *rxq = &rxo->q;
2623         struct be_rx_page_info *page_info;
2624
2625         while (atomic_read(&rxq->used) > 0) {
2626                 page_info = get_rx_page_info(rxo);
2627                 put_page(page_info->page);
2628                 memset(page_info, 0, sizeof(*page_info));
2629         }
2630         BUG_ON(atomic_read(&rxq->used));
2631         rxq->tail = 0;
2632         rxq->head = 0;
2633 }
2634
2635 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2636 {
2637         struct be_queue_info *rx_cq = &rxo->cq;
2638         struct be_rx_compl_info *rxcp;
2639         struct be_adapter *adapter = rxo->adapter;
2640         int flush_wait = 0;
2641
2642         /* Consume pending rx completions.
2643          * Wait for the flush completion (identified by zero num_rcvd)
2644          * to arrive. Notify CQ even when there are no more CQ entries
2645          * for HW to flush partially coalesced CQ entries.
2646          * In Lancer, there is no need to wait for flush compl.
2647          */
2648         for (;;) {
2649                 rxcp = be_rx_compl_get(rxo);
2650                 if (!rxcp) {
2651                         if (lancer_chip(adapter))
2652                                 break;
2653
2654                         if (flush_wait++ > 50 ||
2655                             be_check_error(adapter,
2656                                            BE_ERROR_HW)) {
2657                                 dev_warn(&adapter->pdev->dev,
2658                                          "did not receive flush compl\n");
2659                                 break;
2660                         }
2661                         be_cq_notify(adapter, rx_cq->id, true, 0);
2662                         mdelay(1);
2663                 } else {
2664                         be_rx_compl_discard(rxo, rxcp);
2665                         be_cq_notify(adapter, rx_cq->id, false, 1);
2666                         if (rxcp->num_rcvd == 0)
2667                                 break;
2668                 }
2669         }
2670
2671         /* After cleanup, leave the CQ in unarmed state */
2672         be_cq_notify(adapter, rx_cq->id, false, 0);
2673 }
2674
2675 static void be_tx_compl_clean(struct be_adapter *adapter)
2676 {
2677         struct device *dev = &adapter->pdev->dev;
2678         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2679         struct be_tx_compl_info *txcp;
2680         struct be_queue_info *txq;
2681         u32 end_idx, notified_idx;
2682         struct be_tx_obj *txo;
2683         int i, pending_txqs;
2684
2685         /* Stop polling for compls when HW has been silent for 10ms */
2686         do {
2687                 pending_txqs = adapter->num_tx_qs;
2688
2689                 for_all_tx_queues(adapter, txo, i) {
2690                         cmpl = 0;
2691                         num_wrbs = 0;
2692                         txq = &txo->q;
2693                         while ((txcp = be_tx_compl_get(txo))) {
2694                                 num_wrbs +=
2695                                         be_tx_compl_process(adapter, txo,
2696                                                             txcp->end_index);
2697                                 cmpl++;
2698                         }
2699                         if (cmpl) {
2700                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2701                                 atomic_sub(num_wrbs, &txq->used);
2702                                 timeo = 0;
2703                         }
2704                         if (!be_is_tx_compl_pending(txo))
2705                                 pending_txqs--;
2706                 }
2707
2708                 if (pending_txqs == 0 || ++timeo > 10 ||
2709                     be_check_error(adapter, BE_ERROR_HW))
2710                         break;
2711
2712                 mdelay(1);
2713         } while (true);
2714
2715         /* Free enqueued TX that was never notified to HW */
2716         for_all_tx_queues(adapter, txo, i) {
2717                 txq = &txo->q;
2718
2719                 if (atomic_read(&txq->used)) {
2720                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2721                                  i, atomic_read(&txq->used));
2722                         notified_idx = txq->tail;
2723                         end_idx = txq->tail;
2724                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2725                                   txq->len);
2726                         /* Use the tx-compl process logic to handle requests
2727                          * that were not sent to the HW.
2728                          */
2729                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2730                         atomic_sub(num_wrbs, &txq->used);
2731                         BUG_ON(atomic_read(&txq->used));
2732                         txo->pend_wrb_cnt = 0;
2733                         /* Since hw was never notified of these requests,
2734                          * reset TXQ indices
2735                          */
2736                         txq->head = notified_idx;
2737                         txq->tail = notified_idx;
2738                 }
2739         }
2740 }
2741
2742 static void be_evt_queues_destroy(struct be_adapter *adapter)
2743 {
2744         struct be_eq_obj *eqo;
2745         int i;
2746
2747         for_all_evt_queues(adapter, eqo, i) {
2748                 if (eqo->q.created) {
2749                         be_eq_clean(eqo);
2750                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2751                         napi_hash_del(&eqo->napi);
2752                         netif_napi_del(&eqo->napi);
2753                         free_cpumask_var(eqo->affinity_mask);
2754                 }
2755                 be_queue_free(adapter, &eqo->q);
2756         }
2757 }
2758
2759 static int be_evt_queues_create(struct be_adapter *adapter)
2760 {
2761         struct be_queue_info *eq;
2762         struct be_eq_obj *eqo;
2763         struct be_aic_obj *aic;
2764         int i, rc;
2765
2766         /* need enough EQs to service both RX and TX queues */
2767         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2768                                     max(adapter->cfg_num_rx_irqs,
2769                                         adapter->cfg_num_tx_irqs));
2770
2771         for_all_evt_queues(adapter, eqo, i) {
2772                 int numa_node = dev_to_node(&adapter->pdev->dev);
2773
2774                 aic = &adapter->aic_obj[i];
2775                 eqo->adapter = adapter;
2776                 eqo->idx = i;
2777                 aic->max_eqd = BE_MAX_EQD;
2778                 aic->enable = true;
2779
2780                 eq = &eqo->q;
2781                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2782                                     sizeof(struct be_eq_entry));
2783                 if (rc)
2784                         return rc;
2785
2786                 rc = be_cmd_eq_create(adapter, eqo);
2787                 if (rc)
2788                         return rc;
2789
2790                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2791                         return -ENOMEM;
2792                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2793                                 eqo->affinity_mask);
2794                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2795                                BE_NAPI_WEIGHT);
2796         }
2797         return 0;
2798 }
2799
2800 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2801 {
2802         struct be_queue_info *q;
2803
2804         q = &adapter->mcc_obj.q;
2805         if (q->created)
2806                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2807         be_queue_free(adapter, q);
2808
2809         q = &adapter->mcc_obj.cq;
2810         if (q->created)
2811                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2812         be_queue_free(adapter, q);
2813 }
2814
2815 /* Must be called only after TX qs are created as MCC shares TX EQ */
2816 static int be_mcc_queues_create(struct be_adapter *adapter)
2817 {
2818         struct be_queue_info *q, *cq;
2819
2820         cq = &adapter->mcc_obj.cq;
2821         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2822                            sizeof(struct be_mcc_compl)))
2823                 goto err;
2824
2825         /* Use the default EQ for MCC completions */
2826         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2827                 goto mcc_cq_free;
2828
2829         q = &adapter->mcc_obj.q;
2830         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2831                 goto mcc_cq_destroy;
2832
2833         if (be_cmd_mccq_create(adapter, q, cq))
2834                 goto mcc_q_free;
2835
2836         return 0;
2837
2838 mcc_q_free:
2839         be_queue_free(adapter, q);
2840 mcc_cq_destroy:
2841         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2842 mcc_cq_free:
2843         be_queue_free(adapter, cq);
2844 err:
2845         return -1;
2846 }
2847
2848 static void be_tx_queues_destroy(struct be_adapter *adapter)
2849 {
2850         struct be_queue_info *q;
2851         struct be_tx_obj *txo;
2852         u8 i;
2853
2854         for_all_tx_queues(adapter, txo, i) {
2855                 q = &txo->q;
2856                 if (q->created)
2857                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2858                 be_queue_free(adapter, q);
2859
2860                 q = &txo->cq;
2861                 if (q->created)
2862                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863                 be_queue_free(adapter, q);
2864         }
2865 }
2866
2867 static int be_tx_qs_create(struct be_adapter *adapter)
2868 {
2869         struct be_queue_info *cq;
2870         struct be_tx_obj *txo;
2871         struct be_eq_obj *eqo;
2872         int status, i;
2873
2874         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2875
2876         for_all_tx_queues(adapter, txo, i) {
2877                 cq = &txo->cq;
2878                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2879                                         sizeof(struct be_eth_tx_compl));
2880                 if (status)
2881                         return status;
2882
2883                 u64_stats_init(&txo->stats.sync);
2884                 u64_stats_init(&txo->stats.sync_compl);
2885
2886                 /* If num_evt_qs is less than num_tx_qs, then more than
2887                  * one txq share an eq
2888                  */
2889                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2890                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2891                 if (status)
2892                         return status;
2893
2894                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2895                                         sizeof(struct be_eth_wrb));
2896                 if (status)
2897                         return status;
2898
2899                 status = be_cmd_txq_create(adapter, txo);
2900                 if (status)
2901                         return status;
2902
2903                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2904                                     eqo->idx);
2905         }
2906
2907         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2908                  adapter->num_tx_qs);
2909         return 0;
2910 }
2911
2912 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2913 {
2914         struct be_queue_info *q;
2915         struct be_rx_obj *rxo;
2916         int i;
2917
2918         for_all_rx_queues(adapter, rxo, i) {
2919                 q = &rxo->cq;
2920                 if (q->created)
2921                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2922                 be_queue_free(adapter, q);
2923         }
2924 }
2925
2926 static int be_rx_cqs_create(struct be_adapter *adapter)
2927 {
2928         struct be_queue_info *eq, *cq;
2929         struct be_rx_obj *rxo;
2930         int rc, i;
2931
2932         adapter->num_rss_qs =
2933                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2934
2935         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2936         if (adapter->num_rss_qs < 2)
2937                 adapter->num_rss_qs = 0;
2938
2939         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2940
2941         /* When the interface is not capable of RSS rings (and there is no
2942          * need to create a default RXQ) we'll still need one RXQ
2943          */
2944         if (adapter->num_rx_qs == 0)
2945                 adapter->num_rx_qs = 1;
2946
2947         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2948         for_all_rx_queues(adapter, rxo, i) {
2949                 rxo->adapter = adapter;
2950                 cq = &rxo->cq;
2951                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2952                                     sizeof(struct be_eth_rx_compl));
2953                 if (rc)
2954                         return rc;
2955
2956                 u64_stats_init(&rxo->stats.sync);
2957                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2958                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2959                 if (rc)
2960                         return rc;
2961         }
2962
2963         dev_info(&adapter->pdev->dev,
2964                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2965         return 0;
2966 }
2967
2968 static irqreturn_t be_intx(int irq, void *dev)
2969 {
2970         struct be_eq_obj *eqo = dev;
2971         struct be_adapter *adapter = eqo->adapter;
2972         int num_evts = 0;
2973
2974         /* IRQ is not expected when NAPI is scheduled as the EQ
2975          * will not be armed.
2976          * But, this can happen on Lancer INTx where it takes
2977          * a while to de-assert INTx or in BE2 where occasionaly
2978          * an interrupt may be raised even when EQ is unarmed.
2979          * If NAPI is already scheduled, then counting & notifying
2980          * events will orphan them.
2981          */
2982         if (napi_schedule_prep(&eqo->napi)) {
2983                 num_evts = events_get(eqo);
2984                 __napi_schedule(&eqo->napi);
2985                 if (num_evts)
2986                         eqo->spurious_intr = 0;
2987         }
2988         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2989
2990         /* Return IRQ_HANDLED only for the the first spurious intr
2991          * after a valid intr to stop the kernel from branding
2992          * this irq as a bad one!
2993          */
2994         if (num_evts || eqo->spurious_intr++ == 0)
2995                 return IRQ_HANDLED;
2996         else
2997                 return IRQ_NONE;
2998 }
2999
3000 static irqreturn_t be_msix(int irq, void *dev)
3001 {
3002         struct be_eq_obj *eqo = dev;
3003
3004         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3005         napi_schedule(&eqo->napi);
3006         return IRQ_HANDLED;
3007 }
3008
3009 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3010 {
3011         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3012 }
3013
3014 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3015                          int budget, int polling)
3016 {
3017         struct be_adapter *adapter = rxo->adapter;
3018         struct be_queue_info *rx_cq = &rxo->cq;
3019         struct be_rx_compl_info *rxcp;
3020         u32 work_done;
3021         u32 frags_consumed = 0;
3022
3023         for (work_done = 0; work_done < budget; work_done++) {
3024                 rxcp = be_rx_compl_get(rxo);
3025                 if (!rxcp)
3026                         break;
3027
3028                 /* Is it a flush compl that has no data */
3029                 if (unlikely(rxcp->num_rcvd == 0))
3030                         goto loop_continue;
3031
3032                 /* Discard compl with partial DMA Lancer B0 */
3033                 if (unlikely(!rxcp->pkt_size)) {
3034                         be_rx_compl_discard(rxo, rxcp);
3035                         goto loop_continue;
3036                 }
3037
3038                 /* On BE drop pkts that arrive due to imperfect filtering in
3039                  * promiscuous mode on some skews
3040                  */
3041                 if (unlikely(rxcp->port != adapter->port_num &&
3042                              !lancer_chip(adapter))) {
3043                         be_rx_compl_discard(rxo, rxcp);
3044                         goto loop_continue;
3045                 }
3046
3047                 /* Don't do gro when we're busy_polling */
3048                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3049                         be_rx_compl_process_gro(rxo, napi, rxcp);
3050                 else
3051                         be_rx_compl_process(rxo, napi, rxcp);
3052
3053 loop_continue:
3054                 frags_consumed += rxcp->num_rcvd;
3055                 be_rx_stats_update(rxo, rxcp);
3056         }
3057
3058         if (work_done) {
3059                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3060
3061                 /* When an rx-obj gets into post_starved state, just
3062                  * let be_worker do the posting.
3063                  */
3064                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3065                     !rxo->rx_post_starved)
3066                         be_post_rx_frags(rxo, GFP_ATOMIC,
3067                                          max_t(u32, MAX_RX_POST,
3068                                                frags_consumed));
3069         }
3070
3071         return work_done;
3072 }
3073
3074 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3075 {
3076         switch (status) {
3077         case BE_TX_COMP_HDR_PARSE_ERR:
3078                 tx_stats(txo)->tx_hdr_parse_err++;
3079                 break;
3080         case BE_TX_COMP_NDMA_ERR:
3081                 tx_stats(txo)->tx_dma_err++;
3082                 break;
3083         case BE_TX_COMP_ACL_ERR:
3084                 tx_stats(txo)->tx_spoof_check_err++;
3085                 break;
3086         }
3087 }
3088
3089 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3090 {
3091         switch (status) {
3092         case LANCER_TX_COMP_LSO_ERR:
3093                 tx_stats(txo)->tx_tso_err++;
3094                 break;
3095         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3096         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3097                 tx_stats(txo)->tx_spoof_check_err++;
3098                 break;
3099         case LANCER_TX_COMP_QINQ_ERR:
3100                 tx_stats(txo)->tx_qinq_err++;
3101                 break;
3102         case LANCER_TX_COMP_PARITY_ERR:
3103                 tx_stats(txo)->tx_internal_parity_err++;
3104                 break;
3105         case LANCER_TX_COMP_DMA_ERR:
3106                 tx_stats(txo)->tx_dma_err++;
3107                 break;
3108         }
3109 }
3110
3111 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3112                           int idx)
3113 {
3114         int num_wrbs = 0, work_done = 0;
3115         struct be_tx_compl_info *txcp;
3116
3117         while ((txcp = be_tx_compl_get(txo))) {
3118                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3119                 work_done++;
3120
3121                 if (txcp->status) {
3122                         if (lancer_chip(adapter))
3123                                 lancer_update_tx_err(txo, txcp->status);
3124                         else
3125                                 be_update_tx_err(txo, txcp->status);
3126                 }
3127         }
3128
3129         if (work_done) {
3130                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3131                 atomic_sub(num_wrbs, &txo->q.used);
3132
3133                 /* As Tx wrbs have been freed up, wake up netdev queue
3134                  * if it was stopped due to lack of tx wrbs.  */
3135                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3136                     be_can_txq_wake(txo)) {
3137                         netif_wake_subqueue(adapter->netdev, idx);
3138                 }
3139
3140                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3141                 tx_stats(txo)->tx_compl += work_done;
3142                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3143         }
3144 }
3145
3146 #ifdef CONFIG_NET_RX_BUSY_POLL
3147 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3148 {
3149         bool status = true;
3150
3151         spin_lock(&eqo->lock); /* BH is already disabled */
3152         if (eqo->state & BE_EQ_LOCKED) {
3153                 WARN_ON(eqo->state & BE_EQ_NAPI);
3154                 eqo->state |= BE_EQ_NAPI_YIELD;
3155                 status = false;
3156         } else {
3157                 eqo->state = BE_EQ_NAPI;
3158         }
3159         spin_unlock(&eqo->lock);
3160         return status;
3161 }
3162
3163 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3164 {
3165         spin_lock(&eqo->lock); /* BH is already disabled */
3166
3167         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3168         eqo->state = BE_EQ_IDLE;
3169
3170         spin_unlock(&eqo->lock);
3171 }
3172
3173 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3174 {
3175         bool status = true;
3176
3177         spin_lock_bh(&eqo->lock);
3178         if (eqo->state & BE_EQ_LOCKED) {
3179                 eqo->state |= BE_EQ_POLL_YIELD;
3180                 status = false;
3181         } else {
3182                 eqo->state |= BE_EQ_POLL;
3183         }
3184         spin_unlock_bh(&eqo->lock);
3185         return status;
3186 }
3187
3188 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3189 {
3190         spin_lock_bh(&eqo->lock);
3191
3192         WARN_ON(eqo->state & (BE_EQ_NAPI));
3193         eqo->state = BE_EQ_IDLE;
3194
3195         spin_unlock_bh(&eqo->lock);
3196 }
3197
3198 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3199 {
3200         spin_lock_init(&eqo->lock);
3201         eqo->state = BE_EQ_IDLE;
3202 }
3203
3204 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3205 {
3206         local_bh_disable();
3207
3208         /* It's enough to just acquire napi lock on the eqo to stop
3209          * be_busy_poll() from processing any queueus.
3210          */
3211         while (!be_lock_napi(eqo))
3212                 mdelay(1);
3213
3214         local_bh_enable();
3215 }
3216
3217 #else /* CONFIG_NET_RX_BUSY_POLL */
3218
3219 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3220 {
3221         return true;
3222 }
3223
3224 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3225 {
3226 }
3227
3228 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3229 {
3230         return false;
3231 }
3232
3233 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3234 {
3235 }
3236
3237 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3238 {
3239 }
3240
3241 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3242 {
3243 }
3244 #endif /* CONFIG_NET_RX_BUSY_POLL */
3245
3246 int be_poll(struct napi_struct *napi, int budget)
3247 {
3248         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3249         struct be_adapter *adapter = eqo->adapter;
3250         int max_work = 0, work, i, num_evts;
3251         struct be_rx_obj *rxo;
3252         struct be_tx_obj *txo;
3253         u32 mult_enc = 0;
3254
3255         num_evts = events_get(eqo);
3256
3257         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3258                 be_process_tx(adapter, txo, i);
3259
3260         if (be_lock_napi(eqo)) {
3261                 /* This loop will iterate twice for EQ0 in which
3262                  * completions of the last RXQ (default one) are also processed
3263                  * For other EQs the loop iterates only once
3264                  */
3265                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3266                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3267                         max_work = max(work, max_work);
3268                 }
3269                 be_unlock_napi(eqo);
3270         } else {
3271                 max_work = budget;
3272         }
3273
3274         if (is_mcc_eqo(eqo))
3275                 be_process_mcc(adapter);
3276
3277         if (max_work < budget) {
3278                 napi_complete(napi);
3279
3280                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3281                  * delay via a delay multiplier encoding value
3282                  */
3283                 if (skyhawk_chip(adapter))
3284                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3285
3286                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3287                              mult_enc);
3288         } else {
3289                 /* As we'll continue in polling mode, count and clear events */
3290                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3291         }
3292         return max_work;
3293 }
3294
3295 #ifdef CONFIG_NET_RX_BUSY_POLL
3296 static int be_busy_poll(struct napi_struct *napi)
3297 {
3298         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3299         struct be_adapter *adapter = eqo->adapter;
3300         struct be_rx_obj *rxo;
3301         int i, work = 0;
3302
3303         if (!be_lock_busy_poll(eqo))
3304                 return LL_FLUSH_BUSY;
3305
3306         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3307                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3308                 if (work)
3309                         break;
3310         }
3311
3312         be_unlock_busy_poll(eqo);
3313         return work;
3314 }
3315 #endif
3316
3317 void be_detect_error(struct be_adapter *adapter)
3318 {
3319         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3320         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3321         u32 i;
3322         struct device *dev = &adapter->pdev->dev;
3323
3324         if (be_check_error(adapter, BE_ERROR_HW))
3325                 return;
3326
3327         if (lancer_chip(adapter)) {
3328                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3329                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3330                         be_set_error(adapter, BE_ERROR_UE);
3331                         sliport_err1 = ioread32(adapter->db +
3332                                                 SLIPORT_ERROR1_OFFSET);
3333                         sliport_err2 = ioread32(adapter->db +
3334                                                 SLIPORT_ERROR2_OFFSET);
3335                         /* Do not log error messages if its a FW reset */
3336                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3337                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3338                                 dev_info(dev, "Firmware update in progress\n");
3339                         } else {
3340                                 dev_err(dev, "Error detected in the card\n");
3341                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3342                                         sliport_status);
3343                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3344                                         sliport_err1);
3345                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3346                                         sliport_err2);
3347                         }
3348                 }
3349         } else {
3350                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3351                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3352                 ue_lo_mask = ioread32(adapter->pcicfg +
3353                                       PCICFG_UE_STATUS_LOW_MASK);
3354                 ue_hi_mask = ioread32(adapter->pcicfg +
3355                                       PCICFG_UE_STATUS_HI_MASK);
3356
3357                 ue_lo = (ue_lo & ~ue_lo_mask);
3358                 ue_hi = (ue_hi & ~ue_hi_mask);
3359
3360                 /* On certain platforms BE hardware can indicate spurious UEs.
3361                  * Allow HW to stop working completely in case of a real UE.
3362                  * Hence not setting the hw_error for UE detection.
3363                  */
3364
3365                 if (ue_lo || ue_hi) {
3366                         dev_err(dev, "Error detected in the adapter");
3367                         if (skyhawk_chip(adapter))
3368                                 be_set_error(adapter, BE_ERROR_UE);
3369
3370                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3371                                 if (ue_lo & 1)
3372                                         dev_err(dev, "UE: %s bit set\n",
3373                                                 ue_status_low_desc[i]);
3374                         }
3375                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3376                                 if (ue_hi & 1)
3377                                         dev_err(dev, "UE: %s bit set\n",
3378                                                 ue_status_hi_desc[i]);
3379                         }
3380                 }
3381         }
3382 }
3383
3384 static void be_msix_disable(struct be_adapter *adapter)
3385 {
3386         if (msix_enabled(adapter)) {
3387                 pci_disable_msix(adapter->pdev);
3388                 adapter->num_msix_vec = 0;
3389                 adapter->num_msix_roce_vec = 0;
3390         }
3391 }
3392
3393 static int be_msix_enable(struct be_adapter *adapter)
3394 {
3395         unsigned int i, max_roce_eqs;
3396         struct device *dev = &adapter->pdev->dev;
3397         int num_vec;
3398
3399         /* If RoCE is supported, program the max number of vectors that
3400          * could be used for NIC and RoCE, else, just program the number
3401          * we'll use initially.
3402          */
3403         if (be_roce_supported(adapter)) {
3404                 max_roce_eqs =
3405                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3406                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3407                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3408         } else {
3409                 num_vec = max(adapter->cfg_num_rx_irqs,
3410                               adapter->cfg_num_tx_irqs);
3411         }
3412
3413         for (i = 0; i < num_vec; i++)
3414                 adapter->msix_entries[i].entry = i;
3415
3416         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3417                                         MIN_MSIX_VECTORS, num_vec);
3418         if (num_vec < 0)
3419                 goto fail;
3420
3421         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3422                 adapter->num_msix_roce_vec = num_vec / 2;
3423                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3424                          adapter->num_msix_roce_vec);
3425         }
3426
3427         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3428
3429         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3430                  adapter->num_msix_vec);
3431         return 0;
3432
3433 fail:
3434         dev_warn(dev, "MSIx enable failed\n");
3435
3436         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3437         if (be_virtfn(adapter))
3438                 return num_vec;
3439         return 0;
3440 }
3441
3442 static inline int be_msix_vec_get(struct be_adapter *adapter,
3443                                   struct be_eq_obj *eqo)
3444 {
3445         return adapter->msix_entries[eqo->msix_idx].vector;
3446 }
3447
3448 static int be_msix_register(struct be_adapter *adapter)
3449 {
3450         struct net_device *netdev = adapter->netdev;
3451         struct be_eq_obj *eqo;
3452         int status, i, vec;
3453
3454         for_all_evt_queues(adapter, eqo, i) {
3455                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3456                 vec = be_msix_vec_get(adapter, eqo);
3457                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3458                 if (status)
3459                         goto err_msix;
3460
3461                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3462         }
3463
3464         return 0;
3465 err_msix:
3466         for (i--; i >= 0; i--) {
3467                 eqo = &adapter->eq_obj[i];
3468                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3469         }
3470         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3471                  status);
3472         be_msix_disable(adapter);
3473         return status;
3474 }
3475
3476 static int be_irq_register(struct be_adapter *adapter)
3477 {
3478         struct net_device *netdev = adapter->netdev;
3479         int status;
3480
3481         if (msix_enabled(adapter)) {
3482                 status = be_msix_register(adapter);
3483                 if (status == 0)
3484                         goto done;
3485                 /* INTx is not supported for VF */
3486                 if (be_virtfn(adapter))
3487                         return status;
3488         }
3489
3490         /* INTx: only the first EQ is used */
3491         netdev->irq = adapter->pdev->irq;
3492         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3493                              &adapter->eq_obj[0]);
3494         if (status) {
3495                 dev_err(&adapter->pdev->dev,
3496                         "INTx request IRQ failed - err %d\n", status);
3497                 return status;
3498         }
3499 done:
3500         adapter->isr_registered = true;
3501         return 0;
3502 }
3503
3504 static void be_irq_unregister(struct be_adapter *adapter)
3505 {
3506         struct net_device *netdev = adapter->netdev;
3507         struct be_eq_obj *eqo;
3508         int i, vec;
3509
3510         if (!adapter->isr_registered)
3511                 return;
3512
3513         /* INTx */
3514         if (!msix_enabled(adapter)) {
3515                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3516                 goto done;
3517         }
3518
3519         /* MSIx */
3520         for_all_evt_queues(adapter, eqo, i) {
3521                 vec = be_msix_vec_get(adapter, eqo);
3522                 irq_set_affinity_hint(vec, NULL);
3523                 free_irq(vec, eqo);
3524         }
3525
3526 done:
3527         adapter->isr_registered = false;
3528 }
3529
3530 static void be_rx_qs_destroy(struct be_adapter *adapter)
3531 {
3532         struct rss_info *rss = &adapter->rss_info;
3533         struct be_queue_info *q;
3534         struct be_rx_obj *rxo;
3535         int i;
3536
3537         for_all_rx_queues(adapter, rxo, i) {
3538                 q = &rxo->q;
3539                 if (q->created) {
3540                         /* If RXQs are destroyed while in an "out of buffer"
3541                          * state, there is a possibility of an HW stall on
3542                          * Lancer. So, post 64 buffers to each queue to relieve
3543                          * the "out of buffer" condition.
3544                          * Make sure there's space in the RXQ before posting.
3545                          */
3546                         if (lancer_chip(adapter)) {
3547                                 be_rx_cq_clean(rxo);
3548                                 if (atomic_read(&q->used) == 0)
3549                                         be_post_rx_frags(rxo, GFP_KERNEL,
3550                                                          MAX_RX_POST);
3551                         }
3552
3553                         be_cmd_rxq_destroy(adapter, q);
3554                         be_rx_cq_clean(rxo);
3555                         be_rxq_clean(rxo);
3556                 }
3557                 be_queue_free(adapter, q);
3558         }
3559
3560         if (rss->rss_flags) {
3561                 rss->rss_flags = RSS_ENABLE_NONE;
3562                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3563                                   128, rss->rss_hkey);
3564         }
3565 }
3566
3567 static void be_disable_if_filters(struct be_adapter *adapter)
3568 {
3569         be_cmd_pmac_del(adapter, adapter->if_handle,
3570                         adapter->pmac_id[0], 0);
3571
3572         be_clear_uc_list(adapter);
3573         be_clear_mc_list(adapter);
3574
3575         /* The IFACE flags are enabled in the open path and cleared
3576          * in the close path. When a VF gets detached from the host and
3577          * assigned to a VM the following happens:
3578          *      - VF's IFACE flags get cleared in the detach path
3579          *      - IFACE create is issued by the VF in the attach path
3580          * Due to a bug in the BE3/Skyhawk-R FW
3581          * (Lancer FW doesn't have the bug), the IFACE capability flags
3582          * specified along with the IFACE create cmd issued by a VF are not
3583          * honoured by FW.  As a consequence, if a *new* driver
3584          * (that enables/disables IFACE flags in open/close)
3585          * is loaded in the host and an *old* driver is * used by a VM/VF,
3586          * the IFACE gets created *without* the needed flags.
3587          * To avoid this, disable RX-filter flags only for Lancer.
3588          */
3589         if (lancer_chip(adapter)) {
3590                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3591                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3592         }
3593 }
3594
3595 static int be_close(struct net_device *netdev)
3596 {
3597         struct be_adapter *adapter = netdev_priv(netdev);
3598         struct be_eq_obj *eqo;
3599         int i;
3600
3601         /* This protection is needed as be_close() may be called even when the
3602          * adapter is in cleared state (after eeh perm failure)
3603          */
3604         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3605                 return 0;
3606
3607         /* Before attempting cleanup ensure all the pending cmds in the
3608          * config_wq have finished execution
3609          */
3610         flush_workqueue(be_wq);
3611
3612         be_disable_if_filters(adapter);
3613
3614         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3615                 for_all_evt_queues(adapter, eqo, i) {
3616                         napi_disable(&eqo->napi);
3617                         be_disable_busy_poll(eqo);
3618                 }
3619                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3620         }
3621
3622         be_async_mcc_disable(adapter);
3623
3624         /* Wait for all pending tx completions to arrive so that
3625          * all tx skbs are freed.
3626          */
3627         netif_tx_disable(netdev);
3628         be_tx_compl_clean(adapter);
3629
3630         be_rx_qs_destroy(adapter);
3631
3632         for_all_evt_queues(adapter, eqo, i) {
3633                 if (msix_enabled(adapter))
3634                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3635                 else
3636                         synchronize_irq(netdev->irq);
3637                 be_eq_clean(eqo);
3638         }
3639
3640         be_irq_unregister(adapter);
3641
3642         return 0;
3643 }
3644
3645 static int be_rx_qs_create(struct be_adapter *adapter)
3646 {
3647         struct rss_info *rss = &adapter->rss_info;
3648         u8 rss_key[RSS_HASH_KEY_LEN];
3649         struct be_rx_obj *rxo;
3650         int rc, i, j;
3651
3652         for_all_rx_queues(adapter, rxo, i) {
3653                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3654                                     sizeof(struct be_eth_rx_d));
3655                 if (rc)
3656                         return rc;
3657         }
3658
3659         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3660                 rxo = default_rxo(adapter);
3661                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3662                                        rx_frag_size, adapter->if_handle,
3663                                        false, &rxo->rss_id);
3664                 if (rc)
3665                         return rc;
3666         }
3667
3668         for_all_rss_queues(adapter, rxo, i) {
3669                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3670                                        rx_frag_size, adapter->if_handle,
3671                                        true, &rxo->rss_id);
3672                 if (rc)
3673                         return rc;
3674         }
3675
3676         if (be_multi_rxq(adapter)) {
3677                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3678                         for_all_rss_queues(adapter, rxo, i) {
3679                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3680                                         break;
3681                                 rss->rsstable[j + i] = rxo->rss_id;
3682                                 rss->rss_queue[j + i] = i;
3683                         }
3684                 }
3685                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3686                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3687
3688                 if (!BEx_chip(adapter))
3689                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3690                                 RSS_ENABLE_UDP_IPV6;
3691
3692                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3693                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3694                                        RSS_INDIR_TABLE_LEN, rss_key);
3695                 if (rc) {
3696                         rss->rss_flags = RSS_ENABLE_NONE;
3697                         return rc;
3698                 }
3699
3700                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3701         } else {
3702                 /* Disable RSS, if only default RX Q is created */
3703                 rss->rss_flags = RSS_ENABLE_NONE;
3704         }
3705
3706
3707         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3708          * which is a queue empty condition
3709          */
3710         for_all_rx_queues(adapter, rxo, i)
3711                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3712
3713         return 0;
3714 }
3715
3716 static int be_enable_if_filters(struct be_adapter *adapter)
3717 {
3718         int status;
3719
3720         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3721         if (status)
3722                 return status;
3723
3724         /* For BE3 VFs, the PF programs the initial MAC address */
3725         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3726                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3727                                          adapter->if_handle,
3728                                          &adapter->pmac_id[0], 0);
3729                 if (status)
3730                         return status;
3731         }
3732
3733         if (adapter->vlans_added)
3734                 be_vid_config(adapter);
3735
3736         __be_set_rx_mode(adapter);
3737
3738         return 0;
3739 }
3740
3741 static int be_open(struct net_device *netdev)
3742 {
3743         struct be_adapter *adapter = netdev_priv(netdev);
3744         struct be_eq_obj *eqo;
3745         struct be_rx_obj *rxo;
3746         struct be_tx_obj *txo;
3747         u8 link_status;
3748         int status, i;
3749
3750         status = be_rx_qs_create(adapter);
3751         if (status)
3752                 goto err;
3753
3754         status = be_enable_if_filters(adapter);
3755         if (status)
3756                 goto err;
3757
3758         status = be_irq_register(adapter);
3759         if (status)
3760                 goto err;
3761
3762         for_all_rx_queues(adapter, rxo, i)
3763                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3764
3765         for_all_tx_queues(adapter, txo, i)
3766                 be_cq_notify(adapter, txo->cq.id, true, 0);
3767
3768         be_async_mcc_enable(adapter);
3769
3770         for_all_evt_queues(adapter, eqo, i) {
3771                 napi_enable(&eqo->napi);
3772                 be_enable_busy_poll(eqo);
3773                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3774         }
3775         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3776
3777         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3778         if (!status)
3779                 be_link_status_update(adapter, link_status);
3780
3781         netif_tx_start_all_queues(netdev);
3782         if (skyhawk_chip(adapter))
3783                 udp_tunnel_get_rx_info(netdev);
3784
3785         return 0;
3786 err:
3787         be_close(adapter->netdev);
3788         return -EIO;
3789 }
3790
3791 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3792 {
3793         u32 addr;
3794
3795         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3796
3797         mac[5] = (u8)(addr & 0xFF);
3798         mac[4] = (u8)((addr >> 8) & 0xFF);
3799         mac[3] = (u8)((addr >> 16) & 0xFF);
3800         /* Use the OUI from the current MAC address */
3801         memcpy(mac, adapter->netdev->dev_addr, 3);
3802 }
3803
3804 /*
3805  * Generate a seed MAC address from the PF MAC Address using jhash.
3806  * MAC Address for VFs are assigned incrementally starting from the seed.
3807  * These addresses are programmed in the ASIC by the PF and the VF driver
3808  * queries for the MAC address during its probe.
3809  */
3810 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3811 {
3812         u32 vf;
3813         int status = 0;
3814         u8 mac[ETH_ALEN];
3815         struct be_vf_cfg *vf_cfg;
3816
3817         be_vf_eth_addr_generate(adapter, mac);
3818
3819         for_all_vfs(adapter, vf_cfg, vf) {
3820                 if (BEx_chip(adapter))
3821                         status = be_cmd_pmac_add(adapter, mac,
3822                                                  vf_cfg->if_handle,
3823                                                  &vf_cfg->pmac_id, vf + 1);
3824                 else
3825                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3826                                                 vf + 1);
3827
3828                 if (status)
3829                         dev_err(&adapter->pdev->dev,
3830                                 "Mac address assignment failed for VF %d\n",
3831                                 vf);
3832                 else
3833                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3834
3835                 mac[5] += 1;
3836         }
3837         return status;
3838 }
3839
3840 static int be_vfs_mac_query(struct be_adapter *adapter)
3841 {
3842         int status, vf;
3843         u8 mac[ETH_ALEN];
3844         struct be_vf_cfg *vf_cfg;
3845
3846         for_all_vfs(adapter, vf_cfg, vf) {
3847                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3848                                                mac, vf_cfg->if_handle,
3849                                                false, vf+1);
3850                 if (status)
3851                         return status;
3852                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3853         }
3854         return 0;
3855 }
3856
3857 static void be_vf_clear(struct be_adapter *adapter)
3858 {
3859         struct be_vf_cfg *vf_cfg;
3860         u32 vf;
3861
3862         if (pci_vfs_assigned(adapter->pdev)) {
3863                 dev_warn(&adapter->pdev->dev,
3864                          "VFs are assigned to VMs: not disabling VFs\n");
3865                 goto done;
3866         }
3867
3868         pci_disable_sriov(adapter->pdev);
3869
3870         for_all_vfs(adapter, vf_cfg, vf) {
3871                 if (BEx_chip(adapter))
3872                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3873                                         vf_cfg->pmac_id, vf + 1);
3874                 else
3875                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3876                                        vf + 1);
3877
3878                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3879         }
3880
3881         if (BE3_chip(adapter))
3882                 be_cmd_set_hsw_config(adapter, 0, 0,
3883                                       adapter->if_handle,
3884                                       PORT_FWD_TYPE_PASSTHRU, 0);
3885 done:
3886         kfree(adapter->vf_cfg);
3887         adapter->num_vfs = 0;
3888         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3889 }
3890
3891 static void be_clear_queues(struct be_adapter *adapter)
3892 {
3893         be_mcc_queues_destroy(adapter);
3894         be_rx_cqs_destroy(adapter);
3895         be_tx_queues_destroy(adapter);
3896         be_evt_queues_destroy(adapter);
3897 }
3898
3899 static void be_cancel_worker(struct be_adapter *adapter)
3900 {
3901         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3902                 cancel_delayed_work_sync(&adapter->work);
3903                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3904         }
3905 }
3906
3907 static void be_cancel_err_detection(struct be_adapter *adapter)
3908 {
3909         struct be_error_recovery *err_rec = &adapter->error_recovery;
3910
3911         if (!be_err_recovery_workq)
3912                 return;
3913
3914         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3915                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3916                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3917         }
3918 }
3919
3920 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3921 {
3922         struct net_device *netdev = adapter->netdev;
3923
3924         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3925                 be_cmd_manage_iface(adapter, adapter->if_handle,
3926                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3927
3928         if (adapter->vxlan_port)
3929                 be_cmd_set_vxlan_port(adapter, 0);
3930
3931         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3932         adapter->vxlan_port = 0;
3933
3934         netdev->hw_enc_features = 0;
3935         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3936         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3937 }
3938
3939 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3940                                 struct be_resources *vft_res)
3941 {
3942         struct be_resources res = adapter->pool_res;
3943         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3944         struct be_resources res_mod = {0};
3945         u16 num_vf_qs = 1;
3946
3947         /* Distribute the queue resources among the PF and it's VFs */
3948         if (num_vfs) {
3949                 /* Divide the rx queues evenly among the VFs and the PF, capped
3950                  * at VF-EQ-count. Any remainder queues belong to the PF.
3951                  */
3952                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3953                                 res.max_rss_qs / (num_vfs + 1));
3954
3955                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3956                  * RSS Tables per port. Provide RSS on VFs, only if number of
3957                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3958                  */
3959                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3960                         num_vf_qs = 1;
3961         }
3962
3963         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3964          * which are modifiable using SET_PROFILE_CONFIG cmd.
3965          */
3966         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3967                                   RESOURCE_MODIFIABLE, 0);
3968
3969         /* If RSS IFACE capability flags are modifiable for a VF, set the
3970          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3971          * more than 1 RSSQ is available for a VF.
3972          * Otherwise, provision only 1 queue pair for VF.
3973          */
3974         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3975                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3976                 if (num_vf_qs > 1) {
3977                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3978                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3979                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3980                 } else {
3981                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3982                                              BE_IF_FLAGS_DEFQ_RSS);
3983                 }
3984         } else {
3985                 num_vf_qs = 1;
3986         }
3987
3988         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3989                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3990                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3991         }
3992
3993         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3994         vft_res->max_rx_qs = num_vf_qs;
3995         vft_res->max_rss_qs = num_vf_qs;
3996         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3997         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3998
3999         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4000          * among the PF and it's VFs, if the fields are changeable
4001          */
4002         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4003                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4004
4005         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4006                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4007
4008         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4009                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4010
4011         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4012                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4013 }
4014
4015 static void be_if_destroy(struct be_adapter *adapter)
4016 {
4017         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4018
4019         kfree(adapter->pmac_id);
4020         adapter->pmac_id = NULL;
4021
4022         kfree(adapter->mc_list);
4023         adapter->mc_list = NULL;
4024
4025         kfree(adapter->uc_list);
4026         adapter->uc_list = NULL;
4027 }
4028
4029 static int be_clear(struct be_adapter *adapter)
4030 {
4031         struct pci_dev *pdev = adapter->pdev;
4032         struct  be_resources vft_res = {0};
4033
4034         be_cancel_worker(adapter);
4035
4036         flush_workqueue(be_wq);
4037
4038         if (sriov_enabled(adapter))
4039                 be_vf_clear(adapter);
4040
4041         /* Re-configure FW to distribute resources evenly across max-supported
4042          * number of VFs, only when VFs are not already enabled.
4043          */
4044         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4045             !pci_vfs_assigned(pdev)) {
4046                 be_calculate_vf_res(adapter,
4047                                     pci_sriov_get_totalvfs(pdev),
4048                                     &vft_res);
4049                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4050                                         pci_sriov_get_totalvfs(pdev),
4051                                         &vft_res);
4052         }
4053
4054         be_disable_vxlan_offloads(adapter);
4055
4056         be_if_destroy(adapter);
4057
4058         be_clear_queues(adapter);
4059
4060         be_msix_disable(adapter);
4061         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4062         return 0;
4063 }
4064
4065 static int be_vfs_if_create(struct be_adapter *adapter)
4066 {
4067         struct be_resources res = {0};
4068         u32 cap_flags, en_flags, vf;
4069         struct be_vf_cfg *vf_cfg;
4070         int status;
4071
4072         /* If a FW profile exists, then cap_flags are updated */
4073         cap_flags = BE_VF_IF_EN_FLAGS;
4074
4075         for_all_vfs(adapter, vf_cfg, vf) {
4076                 if (!BE3_chip(adapter)) {
4077                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4078                                                            ACTIVE_PROFILE_TYPE,
4079                                                            RESOURCE_LIMITS,
4080                                                            vf + 1);
4081                         if (!status) {
4082                                 cap_flags = res.if_cap_flags;
4083                                 /* Prevent VFs from enabling VLAN promiscuous
4084                                  * mode
4085                                  */
4086                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4087                         }
4088                 }
4089
4090                 /* PF should enable IF flags during proxy if_create call */
4091                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4092                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4093                                           &vf_cfg->if_handle, vf + 1);
4094                 if (status)
4095                         return status;
4096         }
4097
4098         return 0;
4099 }
4100
4101 static int be_vf_setup_init(struct be_adapter *adapter)
4102 {
4103         struct be_vf_cfg *vf_cfg;
4104         int vf;
4105
4106         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4107                                   GFP_KERNEL);
4108         if (!adapter->vf_cfg)
4109                 return -ENOMEM;
4110
4111         for_all_vfs(adapter, vf_cfg, vf) {
4112                 vf_cfg->if_handle = -1;
4113                 vf_cfg->pmac_id = -1;
4114         }
4115         return 0;
4116 }
4117
4118 static int be_vf_setup(struct be_adapter *adapter)
4119 {
4120         struct device *dev = &adapter->pdev->dev;
4121         struct be_vf_cfg *vf_cfg;
4122         int status, old_vfs, vf;
4123         bool spoofchk;
4124
4125         old_vfs = pci_num_vf(adapter->pdev);
4126
4127         status = be_vf_setup_init(adapter);
4128         if (status)
4129                 goto err;
4130
4131         if (old_vfs) {
4132                 for_all_vfs(adapter, vf_cfg, vf) {
4133                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4134                         if (status)
4135                                 goto err;
4136                 }
4137
4138                 status = be_vfs_mac_query(adapter);
4139                 if (status)
4140                         goto err;
4141         } else {
4142                 status = be_vfs_if_create(adapter);
4143                 if (status)
4144                         goto err;
4145
4146                 status = be_vf_eth_addr_config(adapter);
4147                 if (status)
4148                         goto err;
4149         }
4150
4151         for_all_vfs(adapter, vf_cfg, vf) {
4152                 /* Allow VFs to programs MAC/VLAN filters */
4153                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4154                                                   vf + 1);
4155                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4156                         status = be_cmd_set_fn_privileges(adapter,
4157                                                           vf_cfg->privileges |
4158                                                           BE_PRIV_FILTMGMT,
4159                                                           vf + 1);
4160                         if (!status) {
4161                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4162                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4163                                          vf);
4164                         }
4165                 }
4166
4167                 /* Allow full available bandwidth */
4168                 if (!old_vfs)
4169                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4170
4171                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4172                                                vf_cfg->if_handle, NULL,
4173                                                &spoofchk);
4174                 if (!status)
4175                         vf_cfg->spoofchk = spoofchk;
4176
4177                 if (!old_vfs) {
4178                         be_cmd_enable_vf(adapter, vf + 1);
4179                         be_cmd_set_logical_link_config(adapter,
4180                                                        IFLA_VF_LINK_STATE_AUTO,
4181                                                        vf+1);
4182                 }
4183         }
4184
4185         if (!old_vfs) {
4186                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4187                 if (status) {
4188                         dev_err(dev, "SRIOV enable failed\n");
4189                         adapter->num_vfs = 0;
4190                         goto err;
4191                 }
4192         }
4193
4194         if (BE3_chip(adapter)) {
4195                 /* On BE3, enable VEB only when SRIOV is enabled */
4196                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4197                                                adapter->if_handle,
4198                                                PORT_FWD_TYPE_VEB, 0);
4199                 if (status)
4200                         goto err;
4201         }
4202
4203         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4204         return 0;
4205 err:
4206         dev_err(dev, "VF setup failed\n");
4207         be_vf_clear(adapter);
4208         return status;
4209 }
4210
4211 /* Converting function_mode bits on BE3 to SH mc_type enums */
4212
4213 static u8 be_convert_mc_type(u32 function_mode)
4214 {
4215         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4216                 return vNIC1;
4217         else if (function_mode & QNQ_MODE)
4218                 return FLEX10;
4219         else if (function_mode & VNIC_MODE)
4220                 return vNIC2;
4221         else if (function_mode & UMC_ENABLED)
4222                 return UMC;
4223         else
4224                 return MC_NONE;
4225 }
4226
4227 /* On BE2/BE3 FW does not suggest the supported limits */
4228 static void BEx_get_resources(struct be_adapter *adapter,
4229                               struct be_resources *res)
4230 {
4231         bool use_sriov = adapter->num_vfs ? 1 : 0;
4232
4233         if (be_physfn(adapter))
4234                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4235         else
4236                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4237
4238         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4239
4240         if (be_is_mc(adapter)) {
4241                 /* Assuming that there are 4 channels per port,
4242                  * when multi-channel is enabled
4243                  */
4244                 if (be_is_qnq_mode(adapter))
4245                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4246                 else
4247                         /* In a non-qnq multichannel mode, the pvid
4248                          * takes up one vlan entry
4249                          */
4250                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4251         } else {
4252                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4253         }
4254
4255         res->max_mcast_mac = BE_MAX_MC;
4256
4257         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4258          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4259          *    *only* if it is RSS-capable.
4260          */
4261         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4262             be_virtfn(adapter) ||
4263             (be_is_mc(adapter) &&
4264              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4265                 res->max_tx_qs = 1;
4266         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4267                 struct be_resources super_nic_res = {0};
4268
4269                 /* On a SuperNIC profile, the driver needs to use the
4270                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4271                  */
4272                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4273                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4274                                           0);
4275                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4276                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4277         } else {
4278                 res->max_tx_qs = BE3_MAX_TX_QS;
4279         }
4280
4281         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4282             !use_sriov && be_physfn(adapter))
4283                 res->max_rss_qs = (adapter->be3_native) ?
4284                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4285         res->max_rx_qs = res->max_rss_qs + 1;
4286
4287         if (be_physfn(adapter))
4288                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4289                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4290         else
4291                 res->max_evt_qs = 1;
4292
4293         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4294         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4295         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4296                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4297 }
4298
4299 static void be_setup_init(struct be_adapter *adapter)
4300 {
4301         adapter->vlan_prio_bmap = 0xff;
4302         adapter->phy.link_speed = -1;
4303         adapter->if_handle = -1;
4304         adapter->be3_native = false;
4305         adapter->if_flags = 0;
4306         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4307         if (be_physfn(adapter))
4308                 adapter->cmd_privileges = MAX_PRIVILEGES;
4309         else
4310                 adapter->cmd_privileges = MIN_PRIVILEGES;
4311 }
4312
4313 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4314  * However, this HW limitation is not exposed to the host via any SLI cmd.
4315  * As a result, in the case of SRIOV and in particular multi-partition configs
4316  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4317  * for distribution between the VFs. This self-imposed limit will determine the
4318  * no: of VFs for which RSS can be enabled.
4319  */
4320 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4321 {
4322         struct be_port_resources port_res = {0};
4323         u8 rss_tables_on_port;
4324         u16 max_vfs = be_max_vfs(adapter);
4325
4326         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4327                                   RESOURCE_LIMITS, 0);
4328
4329         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4330
4331         /* Each PF Pool's RSS Tables limit =
4332          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4333          */
4334         adapter->pool_res.max_rss_tables =
4335                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4336 }
4337
4338 static int be_get_sriov_config(struct be_adapter *adapter)
4339 {
4340         struct be_resources res = {0};
4341         int max_vfs, old_vfs;
4342
4343         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4344                                   RESOURCE_LIMITS, 0);
4345
4346         /* Some old versions of BE3 FW don't report max_vfs value */
4347         if (BE3_chip(adapter) && !res.max_vfs) {
4348                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4349                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4350         }
4351
4352         adapter->pool_res = res;
4353
4354         /* If during previous unload of the driver, the VFs were not disabled,
4355          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4356          * Instead use the TotalVFs value stored in the pci-dev struct.
4357          */
4358         old_vfs = pci_num_vf(adapter->pdev);
4359         if (old_vfs) {
4360                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4361                          old_vfs);
4362
4363                 adapter->pool_res.max_vfs =
4364                         pci_sriov_get_totalvfs(adapter->pdev);
4365                 adapter->num_vfs = old_vfs;
4366         }
4367
4368         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4369                 be_calculate_pf_pool_rss_tables(adapter);
4370                 dev_info(&adapter->pdev->dev,
4371                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4372                          be_max_pf_pool_rss_tables(adapter));
4373         }
4374         return 0;
4375 }
4376
4377 static void be_alloc_sriov_res(struct be_adapter *adapter)
4378 {
4379         int old_vfs = pci_num_vf(adapter->pdev);
4380         struct  be_resources vft_res = {0};
4381         int status;
4382
4383         be_get_sriov_config(adapter);
4384
4385         if (!old_vfs)
4386                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4387
4388         /* When the HW is in SRIOV capable configuration, the PF-pool
4389          * resources are given to PF during driver load, if there are no
4390          * old VFs. This facility is not available in BE3 FW.
4391          * Also, this is done by FW in Lancer chip.
4392          */
4393         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4394                 be_calculate_vf_res(adapter, 0, &vft_res);
4395                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4396                                                  &vft_res);
4397                 if (status)
4398                         dev_err(&adapter->pdev->dev,
4399                                 "Failed to optimize SRIOV resources\n");
4400         }
4401 }
4402
4403 static int be_get_resources(struct be_adapter *adapter)
4404 {
4405         struct device *dev = &adapter->pdev->dev;
4406         struct be_resources res = {0};
4407         int status;
4408
4409         /* For Lancer, SH etc read per-function resource limits from FW.
4410          * GET_FUNC_CONFIG returns per function guaranteed limits.
4411          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4412          */
4413         if (BEx_chip(adapter)) {
4414                 BEx_get_resources(adapter, &res);
4415         } else {
4416                 status = be_cmd_get_func_config(adapter, &res);
4417                 if (status)
4418                         return status;
4419
4420                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4421                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4422                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4423                         res.max_rss_qs -= 1;
4424         }
4425
4426         /* If RoCE is supported stash away half the EQs for RoCE */
4427         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4428                                 res.max_evt_qs / 2 : res.max_evt_qs;
4429         adapter->res = res;
4430
4431         /* If FW supports RSS default queue, then skip creating non-RSS
4432          * queue for non-IP traffic.
4433          */
4434         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4435                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4436
4437         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4438                  be_max_txqs(adapter), be_max_rxqs(adapter),
4439                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4440                  be_max_vfs(adapter));
4441         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4442                  be_max_uc(adapter), be_max_mc(adapter),
4443                  be_max_vlans(adapter));
4444
4445         /* Ensure RX and TX queues are created in pairs at init time */
4446         adapter->cfg_num_rx_irqs =
4447                                 min_t(u16, netif_get_num_default_rss_queues(),
4448                                       be_max_qp_irqs(adapter));
4449         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4450         return 0;
4451 }
4452
4453 static int be_get_config(struct be_adapter *adapter)
4454 {
4455         int status, level;
4456         u16 profile_id;
4457
4458         status = be_cmd_get_cntl_attributes(adapter);
4459         if (status)
4460                 return status;
4461
4462         status = be_cmd_query_fw_cfg(adapter);
4463         if (status)
4464                 return status;
4465
4466         if (!lancer_chip(adapter) && be_physfn(adapter))
4467                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4468
4469         if (BEx_chip(adapter)) {
4470                 level = be_cmd_get_fw_log_level(adapter);
4471                 adapter->msg_enable =
4472                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4473         }
4474
4475         be_cmd_get_acpi_wol_cap(adapter);
4476         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4477         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4478
4479         be_cmd_query_port_name(adapter);
4480
4481         if (be_physfn(adapter)) {
4482                 status = be_cmd_get_active_profile(adapter, &profile_id);
4483                 if (!status)
4484                         dev_info(&adapter->pdev->dev,
4485                                  "Using profile 0x%x\n", profile_id);
4486         }
4487
4488         return 0;
4489 }
4490
4491 static int be_mac_setup(struct be_adapter *adapter)
4492 {
4493         u8 mac[ETH_ALEN];
4494         int status;
4495
4496         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4497                 status = be_cmd_get_perm_mac(adapter, mac);
4498                 if (status)
4499                         return status;
4500
4501                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4502                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4503         }
4504
4505         return 0;
4506 }
4507
4508 static void be_schedule_worker(struct be_adapter *adapter)
4509 {
4510         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4511         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4512 }
4513
4514 static void be_destroy_err_recovery_workq(void)
4515 {
4516         if (!be_err_recovery_workq)
4517                 return;
4518
4519         flush_workqueue(be_err_recovery_workq);
4520         destroy_workqueue(be_err_recovery_workq);
4521         be_err_recovery_workq = NULL;
4522 }
4523
4524 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4525 {
4526         struct be_error_recovery *err_rec = &adapter->error_recovery;
4527
4528         if (!be_err_recovery_workq)
4529                 return;
4530
4531         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4532                            msecs_to_jiffies(delay));
4533         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4534 }
4535
4536 static int be_setup_queues(struct be_adapter *adapter)
4537 {
4538         struct net_device *netdev = adapter->netdev;
4539         int status;
4540
4541         status = be_evt_queues_create(adapter);
4542         if (status)
4543                 goto err;
4544
4545         status = be_tx_qs_create(adapter);
4546         if (status)
4547                 goto err;
4548
4549         status = be_rx_cqs_create(adapter);
4550         if (status)
4551                 goto err;
4552
4553         status = be_mcc_queues_create(adapter);
4554         if (status)
4555                 goto err;
4556
4557         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4558         if (status)
4559                 goto err;
4560
4561         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4562         if (status)
4563                 goto err;
4564
4565         return 0;
4566 err:
4567         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4568         return status;
4569 }
4570
4571 static int be_if_create(struct be_adapter *adapter)
4572 {
4573         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4574         u32 cap_flags = be_if_cap_flags(adapter);
4575         int status;
4576
4577         /* alloc required memory for other filtering fields */
4578         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4579                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4580         if (!adapter->pmac_id)
4581                 return -ENOMEM;
4582
4583         adapter->mc_list = kcalloc(be_max_mc(adapter),
4584                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4585         if (!adapter->mc_list)
4586                 return -ENOMEM;
4587
4588         adapter->uc_list = kcalloc(be_max_uc(adapter),
4589                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4590         if (!adapter->uc_list)
4591                 return -ENOMEM;
4592
4593         if (adapter->cfg_num_rx_irqs == 1)
4594                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4595
4596         en_flags &= cap_flags;
4597         /* will enable all the needed filter flags in be_open() */
4598         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4599                                   &adapter->if_handle, 0);
4600
4601         if (status)
4602                 return status;
4603
4604         return 0;
4605 }
4606
4607 int be_update_queues(struct be_adapter *adapter)
4608 {
4609         struct net_device *netdev = adapter->netdev;
4610         int status;
4611
4612         if (netif_running(netdev))
4613                 be_close(netdev);
4614
4615         be_cancel_worker(adapter);
4616
4617         /* If any vectors have been shared with RoCE we cannot re-program
4618          * the MSIx table.
4619          */
4620         if (!adapter->num_msix_roce_vec)
4621                 be_msix_disable(adapter);
4622
4623         be_clear_queues(adapter);
4624         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4625         if (status)
4626                 return status;
4627
4628         if (!msix_enabled(adapter)) {
4629                 status = be_msix_enable(adapter);
4630                 if (status)
4631                         return status;
4632         }
4633
4634         status = be_if_create(adapter);
4635         if (status)
4636                 return status;
4637
4638         status = be_setup_queues(adapter);
4639         if (status)
4640                 return status;
4641
4642         be_schedule_worker(adapter);
4643
4644         if (netif_running(netdev))
4645                 status = be_open(netdev);
4646
4647         return status;
4648 }
4649
4650 static inline int fw_major_num(const char *fw_ver)
4651 {
4652         int fw_major = 0, i;
4653
4654         i = sscanf(fw_ver, "%d.", &fw_major);
4655         if (i != 1)
4656                 return 0;
4657
4658         return fw_major;
4659 }
4660
4661 /* If it is error recovery, FLR the PF
4662  * Else if any VFs are already enabled don't FLR the PF
4663  */
4664 static bool be_reset_required(struct be_adapter *adapter)
4665 {
4666         if (be_error_recovering(adapter))
4667                 return true;
4668         else
4669                 return pci_num_vf(adapter->pdev) == 0;
4670 }
4671
4672 /* Wait for the FW to be ready and perform the required initialization */
4673 static int be_func_init(struct be_adapter *adapter)
4674 {
4675         int status;
4676
4677         status = be_fw_wait_ready(adapter);
4678         if (status)
4679                 return status;
4680
4681         /* FW is now ready; clear errors to allow cmds/doorbell */
4682         be_clear_error(adapter, BE_CLEAR_ALL);
4683
4684         if (be_reset_required(adapter)) {
4685                 status = be_cmd_reset_function(adapter);
4686                 if (status)
4687                         return status;
4688
4689                 /* Wait for interrupts to quiesce after an FLR */
4690                 msleep(100);
4691         }
4692
4693         /* Tell FW we're ready to fire cmds */
4694         status = be_cmd_fw_init(adapter);
4695         if (status)
4696                 return status;
4697
4698         /* Allow interrupts for other ULPs running on NIC function */
4699         be_intr_set(adapter, true);
4700
4701         return 0;
4702 }
4703
4704 static int be_setup(struct be_adapter *adapter)
4705 {
4706         struct device *dev = &adapter->pdev->dev;
4707         int status;
4708
4709         status = be_func_init(adapter);
4710         if (status)
4711                 return status;
4712
4713         be_setup_init(adapter);
4714
4715         if (!lancer_chip(adapter))
4716                 be_cmd_req_native_mode(adapter);
4717
4718         /* invoke this cmd first to get pf_num and vf_num which are needed
4719          * for issuing profile related cmds
4720          */
4721         if (!BEx_chip(adapter)) {
4722                 status = be_cmd_get_func_config(adapter, NULL);
4723                 if (status)
4724                         return status;
4725         }
4726
4727         status = be_get_config(adapter);
4728         if (status)
4729                 goto err;
4730
4731         if (!BE2_chip(adapter) && be_physfn(adapter))
4732                 be_alloc_sriov_res(adapter);
4733
4734         status = be_get_resources(adapter);
4735         if (status)
4736                 goto err;
4737
4738         status = be_msix_enable(adapter);
4739         if (status)
4740                 goto err;
4741
4742         /* will enable all the needed filter flags in be_open() */
4743         status = be_if_create(adapter);
4744         if (status)
4745                 goto err;
4746
4747         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4748         rtnl_lock();
4749         status = be_setup_queues(adapter);
4750         rtnl_unlock();
4751         if (status)
4752                 goto err;
4753
4754         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4755
4756         status = be_mac_setup(adapter);
4757         if (status)
4758                 goto err;
4759
4760         be_cmd_get_fw_ver(adapter);
4761         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4762
4763         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4764                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4765                         adapter->fw_ver);
4766                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4767         }
4768
4769         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4770                                          adapter->rx_fc);
4771         if (status)
4772                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4773                                         &adapter->rx_fc);
4774
4775         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4776                  adapter->tx_fc, adapter->rx_fc);
4777
4778         if (be_physfn(adapter))
4779                 be_cmd_set_logical_link_config(adapter,
4780                                                IFLA_VF_LINK_STATE_AUTO, 0);
4781
4782         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4783          * confusing a linux bridge or OVS that it might be connected to.
4784          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4785          * when SRIOV is not enabled.
4786          */
4787         if (BE3_chip(adapter))
4788                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4789                                       PORT_FWD_TYPE_PASSTHRU, 0);
4790
4791         if (adapter->num_vfs)
4792                 be_vf_setup(adapter);
4793
4794         status = be_cmd_get_phy_info(adapter);
4795         if (!status && be_pause_supported(adapter))
4796                 adapter->phy.fc_autoneg = 1;
4797
4798         if (be_physfn(adapter) && !lancer_chip(adapter))
4799                 be_cmd_set_features(adapter);
4800
4801         be_schedule_worker(adapter);
4802         adapter->flags |= BE_FLAGS_SETUP_DONE;
4803         return 0;
4804 err:
4805         be_clear(adapter);
4806         return status;
4807 }
4808
4809 #ifdef CONFIG_NET_POLL_CONTROLLER
4810 static void be_netpoll(struct net_device *netdev)
4811 {
4812         struct be_adapter *adapter = netdev_priv(netdev);
4813         struct be_eq_obj *eqo;
4814         int i;
4815
4816         for_all_evt_queues(adapter, eqo, i) {
4817                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4818                 napi_schedule(&eqo->napi);
4819         }
4820 }
4821 #endif
4822
4823 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4824 {
4825         const struct firmware *fw;
4826         int status;
4827
4828         if (!netif_running(adapter->netdev)) {
4829                 dev_err(&adapter->pdev->dev,
4830                         "Firmware load not allowed (interface is down)\n");
4831                 return -ENETDOWN;
4832         }
4833
4834         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4835         if (status)
4836                 goto fw_exit;
4837
4838         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4839
4840         if (lancer_chip(adapter))
4841                 status = lancer_fw_download(adapter, fw);
4842         else
4843                 status = be_fw_download(adapter, fw);
4844
4845         if (!status)
4846                 be_cmd_get_fw_ver(adapter);
4847
4848 fw_exit:
4849         release_firmware(fw);
4850         return status;
4851 }
4852
4853 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4854                                  u16 flags)
4855 {
4856         struct be_adapter *adapter = netdev_priv(dev);
4857         struct nlattr *attr, *br_spec;
4858         int rem;
4859         int status = 0;
4860         u16 mode = 0;
4861
4862         if (!sriov_enabled(adapter))
4863                 return -EOPNOTSUPP;
4864
4865         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4866         if (!br_spec)
4867                 return -EINVAL;
4868
4869         nla_for_each_nested(attr, br_spec, rem) {
4870                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4871                         continue;
4872
4873                 if (nla_len(attr) < sizeof(mode))
4874                         return -EINVAL;
4875
4876                 mode = nla_get_u16(attr);
4877                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4878                         return -EOPNOTSUPP;
4879
4880                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4881                         return -EINVAL;
4882
4883                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4884                                                adapter->if_handle,
4885                                                mode == BRIDGE_MODE_VEPA ?
4886                                                PORT_FWD_TYPE_VEPA :
4887                                                PORT_FWD_TYPE_VEB, 0);
4888                 if (status)
4889                         goto err;
4890
4891                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4892                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4893
4894                 return status;
4895         }
4896 err:
4897         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4898                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4899
4900         return status;
4901 }
4902
4903 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4904                                  struct net_device *dev, u32 filter_mask,
4905                                  int nlflags)
4906 {
4907         struct be_adapter *adapter = netdev_priv(dev);
4908         int status = 0;
4909         u8 hsw_mode;
4910
4911         /* BE and Lancer chips support VEB mode only */
4912         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4913                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4914                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4915                         return 0;
4916                 hsw_mode = PORT_FWD_TYPE_VEB;
4917         } else {
4918                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4919                                                adapter->if_handle, &hsw_mode,
4920                                                NULL);
4921                 if (status)
4922                         return 0;
4923
4924                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4925                         return 0;
4926         }
4927
4928         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4929                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4930                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4931                                        0, 0, nlflags, filter_mask, NULL);
4932 }
4933
4934 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4935                                          void (*func)(struct work_struct *))
4936 {
4937         struct be_cmd_work *work;
4938
4939         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4940         if (!work) {
4941                 dev_err(&adapter->pdev->dev,
4942                         "be_work memory allocation failed\n");
4943                 return NULL;
4944         }
4945
4946         INIT_WORK(&work->work, func);
4947         work->adapter = adapter;
4948         return work;
4949 }
4950
4951 /* VxLAN offload Notes:
4952  *
4953  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4954  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4955  * is expected to work across all types of IP tunnels once exported. Skyhawk
4956  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4957  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4958  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4959  * those other tunnels are unexported on the fly through ndo_features_check().
4960  *
4961  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4962  * adds more than one port, disable offloads and don't re-enable them again
4963  * until after all the tunnels are removed.
4964  */
4965 static void be_work_add_vxlan_port(struct work_struct *work)
4966 {
4967         struct be_cmd_work *cmd_work =
4968                                 container_of(work, struct be_cmd_work, work);
4969         struct be_adapter *adapter = cmd_work->adapter;
4970         struct net_device *netdev = adapter->netdev;
4971         struct device *dev = &adapter->pdev->dev;
4972         __be16 port = cmd_work->info.vxlan_port;
4973         int status;
4974
4975         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4976                 adapter->vxlan_port_aliases++;
4977                 goto done;
4978         }
4979
4980         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4981                 dev_info(dev,
4982                          "Only one UDP port supported for VxLAN offloads\n");
4983                 dev_info(dev, "Disabling VxLAN offloads\n");
4984                 adapter->vxlan_port_count++;
4985                 goto err;
4986         }
4987
4988         if (adapter->vxlan_port_count++ >= 1)
4989                 goto done;
4990
4991         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4992                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4993         if (status) {
4994                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4995                 goto err;
4996         }
4997
4998         status = be_cmd_set_vxlan_port(adapter, port);
4999         if (status) {
5000                 dev_warn(dev, "Failed to add VxLAN port\n");
5001                 goto err;
5002         }
5003         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5004         adapter->vxlan_port = port;
5005
5006         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5007                                    NETIF_F_TSO | NETIF_F_TSO6 |
5008                                    NETIF_F_GSO_UDP_TUNNEL;
5009         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5010         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5011
5012         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5013                  be16_to_cpu(port));
5014         goto done;
5015 err:
5016         be_disable_vxlan_offloads(adapter);
5017 done:
5018         kfree(cmd_work);
5019 }
5020
5021 static void be_work_del_vxlan_port(struct work_struct *work)
5022 {
5023         struct be_cmd_work *cmd_work =
5024                                 container_of(work, struct be_cmd_work, work);
5025         struct be_adapter *adapter = cmd_work->adapter;
5026         __be16 port = cmd_work->info.vxlan_port;
5027
5028         if (adapter->vxlan_port != port)
5029                 goto done;
5030
5031         if (adapter->vxlan_port_aliases) {
5032                 adapter->vxlan_port_aliases--;
5033                 goto out;
5034         }
5035
5036         be_disable_vxlan_offloads(adapter);
5037
5038         dev_info(&adapter->pdev->dev,
5039                  "Disabled VxLAN offloads for UDP port %d\n",
5040                  be16_to_cpu(port));
5041 done:
5042         adapter->vxlan_port_count--;
5043 out:
5044         kfree(cmd_work);
5045 }
5046
5047 static void be_cfg_vxlan_port(struct net_device *netdev,
5048                               struct udp_tunnel_info *ti,
5049                               void (*func)(struct work_struct *))
5050 {
5051         struct be_adapter *adapter = netdev_priv(netdev);
5052         struct be_cmd_work *cmd_work;
5053
5054         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5055                 return;
5056
5057         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5058                 return;
5059
5060         cmd_work = be_alloc_work(adapter, func);
5061         if (cmd_work) {
5062                 cmd_work->info.vxlan_port = ti->port;
5063                 queue_work(be_wq, &cmd_work->work);
5064         }
5065 }
5066
5067 static void be_del_vxlan_port(struct net_device *netdev,
5068                               struct udp_tunnel_info *ti)
5069 {
5070         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5071 }
5072
5073 static void be_add_vxlan_port(struct net_device *netdev,
5074                               struct udp_tunnel_info *ti)
5075 {
5076         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5077 }
5078
5079 static netdev_features_t be_features_check(struct sk_buff *skb,
5080                                            struct net_device *dev,
5081                                            netdev_features_t features)
5082 {
5083         struct be_adapter *adapter = netdev_priv(dev);
5084         u8 l4_hdr = 0;
5085
5086         /* The code below restricts offload features for some tunneled packets.
5087          * Offload features for normal (non tunnel) packets are unchanged.
5088          */
5089         if (!skb->encapsulation ||
5090             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5091                 return features;
5092
5093         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5094          * should disable tunnel offload features if it's not a VxLAN packet,
5095          * as tunnel offloads have been enabled only for VxLAN. This is done to
5096          * allow other tunneled traffic like GRE work fine while VxLAN
5097          * offloads are configured in Skyhawk-R.
5098          */
5099         switch (vlan_get_protocol(skb)) {
5100         case htons(ETH_P_IP):
5101                 l4_hdr = ip_hdr(skb)->protocol;
5102                 break;
5103         case htons(ETH_P_IPV6):
5104                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5105                 break;
5106         default:
5107                 return features;
5108         }
5109
5110         if (l4_hdr != IPPROTO_UDP ||
5111             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5112             skb->inner_protocol != htons(ETH_P_TEB) ||
5113             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5114             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5115                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5116
5117         return features;
5118 }
5119
5120 static int be_get_phys_port_id(struct net_device *dev,
5121                                struct netdev_phys_item_id *ppid)
5122 {
5123         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5124         struct be_adapter *adapter = netdev_priv(dev);
5125         u8 *id;
5126
5127         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5128                 return -ENOSPC;
5129
5130         ppid->id[0] = adapter->hba_port_num + 1;
5131         id = &ppid->id[1];
5132         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5133              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5134                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5135
5136         ppid->id_len = id_len;
5137
5138         return 0;
5139 }
5140
5141 static void be_set_rx_mode(struct net_device *dev)
5142 {
5143         struct be_adapter *adapter = netdev_priv(dev);
5144         struct be_cmd_work *work;
5145
5146         work = be_alloc_work(adapter, be_work_set_rx_mode);
5147         if (work)
5148                 queue_work(be_wq, &work->work);
5149 }
5150
5151 static const struct net_device_ops be_netdev_ops = {
5152         .ndo_open               = be_open,
5153         .ndo_stop               = be_close,
5154         .ndo_start_xmit         = be_xmit,
5155         .ndo_set_rx_mode        = be_set_rx_mode,
5156         .ndo_set_mac_address    = be_mac_addr_set,
5157         .ndo_change_mtu         = be_change_mtu,
5158         .ndo_get_stats64        = be_get_stats64,
5159         .ndo_validate_addr      = eth_validate_addr,
5160         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5161         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5162         .ndo_set_vf_mac         = be_set_vf_mac,
5163         .ndo_set_vf_vlan        = be_set_vf_vlan,
5164         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5165         .ndo_get_vf_config      = be_get_vf_config,
5166         .ndo_set_vf_link_state  = be_set_vf_link_state,
5167         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5168 #ifdef CONFIG_NET_POLL_CONTROLLER
5169         .ndo_poll_controller    = be_netpoll,
5170 #endif
5171         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5172         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5173 #ifdef CONFIG_NET_RX_BUSY_POLL
5174         .ndo_busy_poll          = be_busy_poll,
5175 #endif
5176         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5177         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5178         .ndo_features_check     = be_features_check,
5179         .ndo_get_phys_port_id   = be_get_phys_port_id,
5180 };
5181
5182 static void be_netdev_init(struct net_device *netdev)
5183 {
5184         struct be_adapter *adapter = netdev_priv(netdev);
5185
5186         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5187                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5188                 NETIF_F_HW_VLAN_CTAG_TX;
5189         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5190                 netdev->hw_features |= NETIF_F_RXHASH;
5191
5192         netdev->features |= netdev->hw_features |
5193                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5194
5195         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5196                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5197
5198         netdev->priv_flags |= IFF_UNICAST_FLT;
5199
5200         netdev->flags |= IFF_MULTICAST;
5201
5202         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5203
5204         netdev->netdev_ops = &be_netdev_ops;
5205
5206         netdev->ethtool_ops = &be_ethtool_ops;
5207 }
5208
5209 static void be_cleanup(struct be_adapter *adapter)
5210 {
5211         struct net_device *netdev = adapter->netdev;
5212
5213         rtnl_lock();
5214         netif_device_detach(netdev);
5215         if (netif_running(netdev))
5216                 be_close(netdev);
5217         rtnl_unlock();
5218
5219         be_clear(adapter);
5220 }
5221
5222 static int be_resume(struct be_adapter *adapter)
5223 {
5224         struct net_device *netdev = adapter->netdev;
5225         int status;
5226
5227         status = be_setup(adapter);
5228         if (status)
5229                 return status;
5230
5231         rtnl_lock();
5232         if (netif_running(netdev))
5233                 status = be_open(netdev);
5234         rtnl_unlock();
5235
5236         if (status)
5237                 return status;
5238
5239         netif_device_attach(netdev);
5240
5241         return 0;
5242 }
5243
5244 static void be_soft_reset(struct be_adapter *adapter)
5245 {
5246         u32 val;
5247
5248         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5249         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5250         val |= SLIPORT_SOFTRESET_SR_MASK;
5251         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5252 }
5253
5254 static bool be_err_is_recoverable(struct be_adapter *adapter)
5255 {
5256         struct be_error_recovery *err_rec = &adapter->error_recovery;
5257         unsigned long initial_idle_time =
5258                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5259         unsigned long recovery_interval =
5260                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5261         u16 ue_err_code;
5262         u32 val;
5263
5264         val = be_POST_stage_get(adapter);
5265         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5266                 return false;
5267         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5268         if (ue_err_code == 0)
5269                 return false;
5270
5271         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5272                 ue_err_code);
5273
5274         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5275                 dev_err(&adapter->pdev->dev,
5276                         "Cannot recover within %lu sec from driver load\n",
5277                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5278                 return false;
5279         }
5280
5281         if (err_rec->last_recovery_time &&
5282             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5283                 dev_err(&adapter->pdev->dev,
5284                         "Cannot recover within %lu sec from last recovery\n",
5285                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5286                 return false;
5287         }
5288
5289         if (ue_err_code == err_rec->last_err_code) {
5290                 dev_err(&adapter->pdev->dev,
5291                         "Cannot recover from a consecutive TPE error\n");
5292                 return false;
5293         }
5294
5295         err_rec->last_recovery_time = jiffies;
5296         err_rec->last_err_code = ue_err_code;
5297         return true;
5298 }
5299
5300 static int be_tpe_recover(struct be_adapter *adapter)
5301 {
5302         struct be_error_recovery *err_rec = &adapter->error_recovery;
5303         int status = -EAGAIN;
5304         u32 val;
5305
5306         switch (err_rec->recovery_state) {
5307         case ERR_RECOVERY_ST_NONE:
5308                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5309                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5310                 break;
5311
5312         case ERR_RECOVERY_ST_DETECT:
5313                 val = be_POST_stage_get(adapter);
5314                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5315                     POST_STAGE_RECOVERABLE_ERR) {
5316                         dev_err(&adapter->pdev->dev,
5317                                 "Unrecoverable HW error detected: 0x%x\n", val);
5318                         status = -EINVAL;
5319                         err_rec->resched_delay = 0;
5320                         break;
5321                 }
5322
5323                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5324
5325                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5326                  * milliseconds before it checks for final error status in
5327                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5328                  * If it does, then PF0 initiates a Soft Reset.
5329                  */
5330                 if (adapter->pf_num == 0) {
5331                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5332                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5333                                         ERR_RECOVERY_UE_DETECT_DURATION;
5334                         break;
5335                 }
5336
5337                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5338                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5339                                         ERR_RECOVERY_UE_DETECT_DURATION;
5340                 break;
5341
5342         case ERR_RECOVERY_ST_RESET:
5343                 if (!be_err_is_recoverable(adapter)) {
5344                         dev_err(&adapter->pdev->dev,
5345                                 "Failed to meet recovery criteria\n");
5346                         status = -EIO;
5347                         err_rec->resched_delay = 0;
5348                         break;
5349                 }
5350                 be_soft_reset(adapter);
5351                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5352                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5353                                         err_rec->ue_to_reset_time;
5354                 break;
5355
5356         case ERR_RECOVERY_ST_PRE_POLL:
5357                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5358                 err_rec->resched_delay = 0;
5359                 status = 0;                     /* done */
5360                 break;
5361
5362         default:
5363                 status = -EINVAL;
5364                 err_rec->resched_delay = 0;
5365                 break;
5366         }
5367
5368         return status;
5369 }
5370
5371 static int be_err_recover(struct be_adapter *adapter)
5372 {
5373         int status;
5374
5375         if (!lancer_chip(adapter)) {
5376                 if (!adapter->error_recovery.recovery_supported ||
5377                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5378                         return -EIO;
5379                 status = be_tpe_recover(adapter);
5380                 if (status)
5381                         goto err;
5382         }
5383
5384         /* Wait for adapter to reach quiescent state before
5385          * destroying queues
5386          */
5387         status = be_fw_wait_ready(adapter);
5388         if (status)
5389                 goto err;
5390
5391         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5392
5393         be_cleanup(adapter);
5394
5395         status = be_resume(adapter);
5396         if (status)
5397                 goto err;
5398
5399         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5400
5401 err:
5402         return status;
5403 }
5404
5405 static void be_err_detection_task(struct work_struct *work)
5406 {
5407         struct be_error_recovery *err_rec =
5408                         container_of(work, struct be_error_recovery,
5409                                      err_detection_work.work);
5410         struct be_adapter *adapter =
5411                         container_of(err_rec, struct be_adapter,
5412                                      error_recovery);
5413         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5414         struct device *dev = &adapter->pdev->dev;
5415         int recovery_status;
5416
5417         be_detect_error(adapter);
5418         if (!be_check_error(adapter, BE_ERROR_HW))
5419                 goto reschedule_task;
5420
5421         recovery_status = be_err_recover(adapter);
5422         if (!recovery_status) {
5423                 err_rec->recovery_retries = 0;
5424                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5425                 dev_info(dev, "Adapter recovery successful\n");
5426                 goto reschedule_task;
5427         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5428                 /* BEx/SH recovery state machine */
5429                 if (adapter->pf_num == 0 &&
5430                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5431                         dev_err(&adapter->pdev->dev,
5432                                 "Adapter recovery in progress\n");
5433                 resched_delay = err_rec->resched_delay;
5434                 goto reschedule_task;
5435         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5436                 /* For VFs, check if PF have allocated resources
5437                  * every second.
5438                  */
5439                 dev_err(dev, "Re-trying adapter recovery\n");
5440                 goto reschedule_task;
5441         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5442                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5443                 /* In case of another error during recovery, it takes 30 sec
5444                  * for adapter to come out of error. Retry error recovery after
5445                  * this time interval.
5446                  */
5447                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5448                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5449                 goto reschedule_task;
5450         } else {
5451                 dev_err(dev, "Adapter recovery failed\n");
5452                 dev_err(dev, "Please reboot server to recover\n");
5453         }
5454
5455         return;
5456
5457 reschedule_task:
5458         be_schedule_err_detection(adapter, resched_delay);
5459 }
5460
5461 static void be_log_sfp_info(struct be_adapter *adapter)
5462 {
5463         int status;
5464
5465         status = be_cmd_query_sfp_info(adapter);
5466         if (!status) {
5467                 dev_err(&adapter->pdev->dev,
5468                         "Port %c: %s Vendor: %s part no: %s",
5469                         adapter->port_name,
5470                         be_misconfig_evt_port_state[adapter->phy_state],
5471                         adapter->phy.vendor_name,
5472                         adapter->phy.vendor_pn);
5473         }
5474         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5475 }
5476
5477 static void be_worker(struct work_struct *work)
5478 {
5479         struct be_adapter *adapter =
5480                 container_of(work, struct be_adapter, work.work);
5481         struct be_rx_obj *rxo;
5482         int i;
5483
5484         if (be_physfn(adapter) &&
5485             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5486                 be_cmd_get_die_temperature(adapter);
5487
5488         /* when interrupts are not yet enabled, just reap any pending
5489          * mcc completions
5490          */
5491         if (!netif_running(adapter->netdev)) {
5492                 local_bh_disable();
5493                 be_process_mcc(adapter);
5494                 local_bh_enable();
5495                 goto reschedule;
5496         }
5497
5498         if (!adapter->stats_cmd_sent) {
5499                 if (lancer_chip(adapter))
5500                         lancer_cmd_get_pport_stats(adapter,
5501                                                    &adapter->stats_cmd);
5502                 else
5503                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5504         }
5505
5506         for_all_rx_queues(adapter, rxo, i) {
5507                 /* Replenish RX-queues starved due to memory
5508                  * allocation failures.
5509                  */
5510                 if (rxo->rx_post_starved)
5511                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5512         }
5513
5514         /* EQ-delay update for Skyhawk is done while notifying EQ */
5515         if (!skyhawk_chip(adapter))
5516                 be_eqd_update(adapter, false);
5517
5518         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5519                 be_log_sfp_info(adapter);
5520
5521 reschedule:
5522         adapter->work_counter++;
5523         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5524 }
5525
5526 static void be_unmap_pci_bars(struct be_adapter *adapter)
5527 {
5528         if (adapter->csr)
5529                 pci_iounmap(adapter->pdev, adapter->csr);
5530         if (adapter->db)
5531                 pci_iounmap(adapter->pdev, adapter->db);
5532         if (adapter->pcicfg && adapter->pcicfg_mapped)
5533                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5534 }
5535
5536 static int db_bar(struct be_adapter *adapter)
5537 {
5538         if (lancer_chip(adapter) || be_virtfn(adapter))
5539                 return 0;
5540         else
5541                 return 4;
5542 }
5543
5544 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5545 {
5546         if (skyhawk_chip(adapter)) {
5547                 adapter->roce_db.size = 4096;
5548                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5549                                                               db_bar(adapter));
5550                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5551                                                                db_bar(adapter));
5552         }
5553         return 0;
5554 }
5555
5556 static int be_map_pci_bars(struct be_adapter *adapter)
5557 {
5558         struct pci_dev *pdev = adapter->pdev;
5559         u8 __iomem *addr;
5560         u32 sli_intf;
5561
5562         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5563         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5564                                 SLI_INTF_FAMILY_SHIFT;
5565         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5566
5567         if (BEx_chip(adapter) && be_physfn(adapter)) {
5568                 adapter->csr = pci_iomap(pdev, 2, 0);
5569                 if (!adapter->csr)
5570                         return -ENOMEM;
5571         }
5572
5573         addr = pci_iomap(pdev, db_bar(adapter), 0);
5574         if (!addr)
5575                 goto pci_map_err;
5576         adapter->db = addr;
5577
5578         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5579                 if (be_physfn(adapter)) {
5580                         /* PCICFG is the 2nd BAR in BE2 */
5581                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5582                         if (!addr)
5583                                 goto pci_map_err;
5584                         adapter->pcicfg = addr;
5585                         adapter->pcicfg_mapped = true;
5586                 } else {
5587                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5588                         adapter->pcicfg_mapped = false;
5589                 }
5590         }
5591
5592         be_roce_map_pci_bars(adapter);
5593         return 0;
5594
5595 pci_map_err:
5596         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5597         be_unmap_pci_bars(adapter);
5598         return -ENOMEM;
5599 }
5600
5601 static void be_drv_cleanup(struct be_adapter *adapter)
5602 {
5603         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5604         struct device *dev = &adapter->pdev->dev;
5605
5606         if (mem->va)
5607                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5608
5609         mem = &adapter->rx_filter;
5610         if (mem->va)
5611                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5612
5613         mem = &adapter->stats_cmd;
5614         if (mem->va)
5615                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5616 }
5617
5618 /* Allocate and initialize various fields in be_adapter struct */
5619 static int be_drv_init(struct be_adapter *adapter)
5620 {
5621         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5622         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5623         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5624         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5625         struct device *dev = &adapter->pdev->dev;
5626         int status = 0;
5627
5628         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5629         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5630                                                  &mbox_mem_alloc->dma,
5631                                                  GFP_KERNEL);
5632         if (!mbox_mem_alloc->va)
5633                 return -ENOMEM;
5634
5635         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5636         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5637         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5638
5639         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5640         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5641                                             &rx_filter->dma, GFP_KERNEL);
5642         if (!rx_filter->va) {
5643                 status = -ENOMEM;
5644                 goto free_mbox;
5645         }
5646
5647         if (lancer_chip(adapter))
5648                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5649         else if (BE2_chip(adapter))
5650                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5651         else if (BE3_chip(adapter))
5652                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5653         else
5654                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5655         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5656                                             &stats_cmd->dma, GFP_KERNEL);
5657         if (!stats_cmd->va) {
5658                 status = -ENOMEM;
5659                 goto free_rx_filter;
5660         }
5661
5662         mutex_init(&adapter->mbox_lock);
5663         mutex_init(&adapter->mcc_lock);
5664         mutex_init(&adapter->rx_filter_lock);
5665         spin_lock_init(&adapter->mcc_cq_lock);
5666         init_completion(&adapter->et_cmd_compl);
5667
5668         pci_save_state(adapter->pdev);
5669
5670         INIT_DELAYED_WORK(&adapter->work, be_worker);
5671
5672         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5673         adapter->error_recovery.resched_delay = 0;
5674         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5675                           be_err_detection_task);
5676
5677         adapter->rx_fc = true;
5678         adapter->tx_fc = true;
5679
5680         /* Must be a power of 2 or else MODULO will BUG_ON */
5681         adapter->be_get_temp_freq = 64;
5682
5683         return 0;
5684
5685 free_rx_filter:
5686         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5687 free_mbox:
5688         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5689                           mbox_mem_alloc->dma);
5690         return status;
5691 }
5692
5693 static void be_remove(struct pci_dev *pdev)
5694 {
5695         struct be_adapter *adapter = pci_get_drvdata(pdev);
5696
5697         if (!adapter)
5698                 return;
5699
5700         be_roce_dev_remove(adapter);
5701         be_intr_set(adapter, false);
5702
5703         be_cancel_err_detection(adapter);
5704
5705         unregister_netdev(adapter->netdev);
5706
5707         be_clear(adapter);
5708
5709         /* tell fw we're done with firing cmds */
5710         be_cmd_fw_clean(adapter);
5711
5712         be_unmap_pci_bars(adapter);
5713         be_drv_cleanup(adapter);
5714
5715         pci_disable_pcie_error_reporting(pdev);
5716
5717         pci_release_regions(pdev);
5718         pci_disable_device(pdev);
5719
5720         free_netdev(adapter->netdev);
5721 }
5722
5723 static ssize_t be_hwmon_show_temp(struct device *dev,
5724                                   struct device_attribute *dev_attr,
5725                                   char *buf)
5726 {
5727         struct be_adapter *adapter = dev_get_drvdata(dev);
5728
5729         /* Unit: millidegree Celsius */
5730         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5731                 return -EIO;
5732         else
5733                 return sprintf(buf, "%u\n",
5734                                adapter->hwmon_info.be_on_die_temp * 1000);
5735 }
5736
5737 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5738                           be_hwmon_show_temp, NULL, 1);
5739
5740 static struct attribute *be_hwmon_attrs[] = {
5741         &sensor_dev_attr_temp1_input.dev_attr.attr,
5742         NULL
5743 };
5744
5745 ATTRIBUTE_GROUPS(be_hwmon);
5746
5747 static char *mc_name(struct be_adapter *adapter)
5748 {
5749         char *str = ""; /* default */
5750
5751         switch (adapter->mc_type) {
5752         case UMC:
5753                 str = "UMC";
5754                 break;
5755         case FLEX10:
5756                 str = "FLEX10";
5757                 break;
5758         case vNIC1:
5759                 str = "vNIC-1";
5760                 break;
5761         case nPAR:
5762                 str = "nPAR";
5763                 break;
5764         case UFP:
5765                 str = "UFP";
5766                 break;
5767         case vNIC2:
5768                 str = "vNIC-2";
5769                 break;
5770         default:
5771                 str = "";
5772         }
5773
5774         return str;
5775 }
5776
5777 static inline char *func_name(struct be_adapter *adapter)
5778 {
5779         return be_physfn(adapter) ? "PF" : "VF";
5780 }
5781
5782 static inline char *nic_name(struct pci_dev *pdev)
5783 {
5784         switch (pdev->device) {
5785         case OC_DEVICE_ID1:
5786                 return OC_NAME;
5787         case OC_DEVICE_ID2:
5788                 return OC_NAME_BE;
5789         case OC_DEVICE_ID3:
5790         case OC_DEVICE_ID4:
5791                 return OC_NAME_LANCER;
5792         case BE_DEVICE_ID2:
5793                 return BE3_NAME;
5794         case OC_DEVICE_ID5:
5795         case OC_DEVICE_ID6:
5796                 return OC_NAME_SH;
5797         default:
5798                 return BE_NAME;
5799         }
5800 }
5801
5802 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5803 {
5804         struct be_adapter *adapter;
5805         struct net_device *netdev;
5806         int status = 0;
5807
5808         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5809
5810         status = pci_enable_device(pdev);
5811         if (status)
5812                 goto do_none;
5813
5814         status = pci_request_regions(pdev, DRV_NAME);
5815         if (status)
5816                 goto disable_dev;
5817         pci_set_master(pdev);
5818
5819         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5820         if (!netdev) {
5821                 status = -ENOMEM;
5822                 goto rel_reg;
5823         }
5824         adapter = netdev_priv(netdev);
5825         adapter->pdev = pdev;
5826         pci_set_drvdata(pdev, adapter);
5827         adapter->netdev = netdev;
5828         SET_NETDEV_DEV(netdev, &pdev->dev);
5829
5830         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5831         if (!status) {
5832                 netdev->features |= NETIF_F_HIGHDMA;
5833         } else {
5834                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5835                 if (status) {
5836                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5837                         goto free_netdev;
5838                 }
5839         }
5840
5841         status = pci_enable_pcie_error_reporting(pdev);
5842         if (!status)
5843                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5844
5845         status = be_map_pci_bars(adapter);
5846         if (status)
5847                 goto free_netdev;
5848
5849         status = be_drv_init(adapter);
5850         if (status)
5851                 goto unmap_bars;
5852
5853         status = be_setup(adapter);
5854         if (status)
5855                 goto drv_cleanup;
5856
5857         be_netdev_init(netdev);
5858         status = register_netdev(netdev);
5859         if (status != 0)
5860                 goto unsetup;
5861
5862         be_roce_dev_add(adapter);
5863
5864         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5865         adapter->error_recovery.probe_time = jiffies;
5866
5867         /* On Die temperature not supported for VF. */
5868         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5869                 adapter->hwmon_info.hwmon_dev =
5870                         devm_hwmon_device_register_with_groups(&pdev->dev,
5871                                                                DRV_NAME,
5872                                                                adapter,
5873                                                                be_hwmon_groups);
5874                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5875         }
5876
5877         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5878                  func_name(adapter), mc_name(adapter), adapter->port_name);
5879
5880         return 0;
5881
5882 unsetup:
5883         be_clear(adapter);
5884 drv_cleanup:
5885         be_drv_cleanup(adapter);
5886 unmap_bars:
5887         be_unmap_pci_bars(adapter);
5888 free_netdev:
5889         free_netdev(netdev);
5890 rel_reg:
5891         pci_release_regions(pdev);
5892 disable_dev:
5893         pci_disable_device(pdev);
5894 do_none:
5895         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5896         return status;
5897 }
5898
5899 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5900 {
5901         struct be_adapter *adapter = pci_get_drvdata(pdev);
5902
5903         be_intr_set(adapter, false);
5904         be_cancel_err_detection(adapter);
5905
5906         be_cleanup(adapter);
5907
5908         pci_save_state(pdev);
5909         pci_disable_device(pdev);
5910         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5911         return 0;
5912 }
5913
5914 static int be_pci_resume(struct pci_dev *pdev)
5915 {
5916         struct be_adapter *adapter = pci_get_drvdata(pdev);
5917         int status = 0;
5918
5919         status = pci_enable_device(pdev);
5920         if (status)
5921                 return status;
5922
5923         pci_restore_state(pdev);
5924
5925         status = be_resume(adapter);
5926         if (status)
5927                 return status;
5928
5929         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5930
5931         return 0;
5932 }
5933
5934 /*
5935  * An FLR will stop BE from DMAing any data.
5936  */
5937 static void be_shutdown(struct pci_dev *pdev)
5938 {
5939         struct be_adapter *adapter = pci_get_drvdata(pdev);
5940
5941         if (!adapter)
5942                 return;
5943
5944         be_roce_dev_shutdown(adapter);
5945         cancel_delayed_work_sync(&adapter->work);
5946         be_cancel_err_detection(adapter);
5947
5948         netif_device_detach(adapter->netdev);
5949
5950         be_cmd_reset_function(adapter);
5951
5952         pci_disable_device(pdev);
5953 }
5954
5955 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5956                                             pci_channel_state_t state)
5957 {
5958         struct be_adapter *adapter = pci_get_drvdata(pdev);
5959
5960         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5961
5962         be_roce_dev_remove(adapter);
5963
5964         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5965                 be_set_error(adapter, BE_ERROR_EEH);
5966
5967                 be_cancel_err_detection(adapter);
5968
5969                 be_cleanup(adapter);
5970         }
5971
5972         if (state == pci_channel_io_perm_failure)
5973                 return PCI_ERS_RESULT_DISCONNECT;
5974
5975         pci_disable_device(pdev);
5976
5977         /* The error could cause the FW to trigger a flash debug dump.
5978          * Resetting the card while flash dump is in progress
5979          * can cause it not to recover; wait for it to finish.
5980          * Wait only for first function as it is needed only once per
5981          * adapter.
5982          */
5983         if (pdev->devfn == 0)
5984                 ssleep(30);
5985
5986         return PCI_ERS_RESULT_NEED_RESET;
5987 }
5988
5989 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5990 {
5991         struct be_adapter *adapter = pci_get_drvdata(pdev);
5992         int status;
5993
5994         dev_info(&adapter->pdev->dev, "EEH reset\n");
5995
5996         status = pci_enable_device(pdev);
5997         if (status)
5998                 return PCI_ERS_RESULT_DISCONNECT;
5999
6000         pci_set_master(pdev);
6001         pci_restore_state(pdev);
6002
6003         /* Check if card is ok and fw is ready */
6004         dev_info(&adapter->pdev->dev,
6005                  "Waiting for FW to be ready after EEH reset\n");
6006         status = be_fw_wait_ready(adapter);
6007         if (status)
6008                 return PCI_ERS_RESULT_DISCONNECT;
6009
6010         pci_cleanup_aer_uncorrect_error_status(pdev);
6011         be_clear_error(adapter, BE_CLEAR_ALL);
6012         return PCI_ERS_RESULT_RECOVERED;
6013 }
6014
6015 static void be_eeh_resume(struct pci_dev *pdev)
6016 {
6017         int status = 0;
6018         struct be_adapter *adapter = pci_get_drvdata(pdev);
6019
6020         dev_info(&adapter->pdev->dev, "EEH resume\n");
6021
6022         pci_save_state(pdev);
6023
6024         status = be_resume(adapter);
6025         if (status)
6026                 goto err;
6027
6028         be_roce_dev_add(adapter);
6029
6030         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6031         return;
6032 err:
6033         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6034 }
6035
6036 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6037 {
6038         struct be_adapter *adapter = pci_get_drvdata(pdev);
6039         struct be_resources vft_res = {0};
6040         int status;
6041
6042         if (!num_vfs)
6043                 be_vf_clear(adapter);
6044
6045         adapter->num_vfs = num_vfs;
6046
6047         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6048                 dev_warn(&pdev->dev,
6049                          "Cannot disable VFs while they are assigned\n");
6050                 return -EBUSY;
6051         }
6052
6053         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6054          * are equally distributed across the max-number of VFs. The user may
6055          * request only a subset of the max-vfs to be enabled.
6056          * Based on num_vfs, redistribute the resources across num_vfs so that
6057          * each VF will have access to more number of resources.
6058          * This facility is not available in BE3 FW.
6059          * Also, this is done by FW in Lancer chip.
6060          */
6061         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6062                 be_calculate_vf_res(adapter, adapter->num_vfs,
6063                                     &vft_res);
6064                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6065                                                  adapter->num_vfs, &vft_res);
6066                 if (status)
6067                         dev_err(&pdev->dev,
6068                                 "Failed to optimize SR-IOV resources\n");
6069         }
6070
6071         status = be_get_resources(adapter);
6072         if (status)
6073                 return be_cmd_status(status);
6074
6075         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6076         rtnl_lock();
6077         status = be_update_queues(adapter);
6078         rtnl_unlock();
6079         if (status)
6080                 return be_cmd_status(status);
6081
6082         if (adapter->num_vfs)
6083                 status = be_vf_setup(adapter);
6084
6085         if (!status)
6086                 return adapter->num_vfs;
6087
6088         return 0;
6089 }
6090
6091 static const struct pci_error_handlers be_eeh_handlers = {
6092         .error_detected = be_eeh_err_detected,
6093         .slot_reset = be_eeh_reset,
6094         .resume = be_eeh_resume,
6095 };
6096
6097 static struct pci_driver be_driver = {
6098         .name = DRV_NAME,
6099         .id_table = be_dev_ids,
6100         .probe = be_probe,
6101         .remove = be_remove,
6102         .suspend = be_suspend,
6103         .resume = be_pci_resume,
6104         .shutdown = be_shutdown,
6105         .sriov_configure = be_pci_sriov_configure,
6106         .err_handler = &be_eeh_handlers
6107 };
6108
6109 static int __init be_init_module(void)
6110 {
6111         int status;
6112
6113         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6114             rx_frag_size != 2048) {
6115                 printk(KERN_WARNING DRV_NAME
6116                         " : Module param rx_frag_size must be 2048/4096/8192."
6117                         " Using 2048\n");
6118                 rx_frag_size = 2048;
6119         }
6120
6121         if (num_vfs > 0) {
6122                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6123                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6124         }
6125
6126         be_wq = create_singlethread_workqueue("be_wq");
6127         if (!be_wq) {
6128                 pr_warn(DRV_NAME "workqueue creation failed\n");
6129                 return -1;
6130         }
6131
6132         be_err_recovery_workq =
6133                 create_singlethread_workqueue("be_err_recover");
6134         if (!be_err_recovery_workq)
6135                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6136
6137         status = pci_register_driver(&be_driver);
6138         if (status) {
6139                 destroy_workqueue(be_wq);
6140                 be_destroy_err_recovery_workq();
6141         }
6142         return status;
6143 }
6144 module_init(be_init_module);
6145
6146 static void __exit be_exit_module(void)
6147 {
6148         pci_unregister_driver(&be_driver);
6149
6150         be_destroy_err_recovery_workq();
6151
6152         if (be_wq)
6153                 destroy_workqueue(be_wq);
6154 }
6155 module_exit(be_exit_module);