be2net: Fix TX stats for TSO packets
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279                                      mac)) {
280                         /* mac already added, skip addition */
281                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282                         return 0;
283                 }
284         }
285
286         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287                                &adapter->pmac_id[0], 0);
288 }
289
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292         int i;
293
294         /* Skip deletion if the programmed mac is
295          * being used in uc-list
296          */
297         for (i = 0; i < adapter->uc_macs; i++) {
298                 if (adapter->pmac_id[i + 1] == pmac_id)
299                         return;
300         }
301         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306         struct be_adapter *adapter = netdev_priv(netdev);
307         struct device *dev = &adapter->pdev->dev;
308         struct sockaddr *addr = p;
309         int status;
310         u8 mac[ETH_ALEN];
311         u32 old_pmac_id = adapter->pmac_id[0];
312
313         if (!is_valid_ether_addr(addr->sa_data))
314                 return -EADDRNOTAVAIL;
315
316         /* Proceed further only if, User provided MAC is different
317          * from active MAC
318          */
319         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320                 return 0;
321
322         /* if device is not running, copy MAC to netdev->dev_addr */
323         if (!netif_running(netdev))
324                 goto done;
325
326         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327          * privilege or if PF did not provision the new MAC address.
328          * On BE3, this cmd will always fail if the VF doesn't have the
329          * FILTMGMT privilege. This failure is OK, only if the PF programmed
330          * the MAC for the VF.
331          */
332         mutex_lock(&adapter->rx_filter_lock);
333         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334         if (!status) {
335
336                 /* Delete the old programmed MAC. This call may fail if the
337                  * old MAC was already deleted by the PF driver.
338                  */
339                 if (adapter->pmac_id[0] != old_pmac_id)
340                         be_dev_mac_del(adapter, old_pmac_id);
341         }
342
343         mutex_unlock(&adapter->rx_filter_lock);
344         /* Decide if the new MAC is successfully activated only after
345          * querying the FW
346          */
347         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348                                        adapter->if_handle, true, 0);
349         if (status)
350                 goto err;
351
352         /* The MAC change did not happen, either due to lack of privilege
353          * or PF didn't pre-provision.
354          */
355         if (!ether_addr_equal(addr->sa_data, mac)) {
356                 status = -EPERM;
357                 goto err;
358         }
359 done:
360         ether_addr_copy(adapter->dev_mac, addr->sa_data);
361         ether_addr_copy(netdev->dev_addr, addr->sa_data);
362         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363         return 0;
364 err:
365         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366         return status;
367 }
368
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372         if (BE2_chip(adapter)) {
373                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374
375                 return &cmd->hw_stats;
376         } else if (BE3_chip(adapter)) {
377                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378
379                 return &cmd->hw_stats;
380         } else {
381                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         }
385 }
386
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390         if (BE2_chip(adapter)) {
391                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392
393                 return &hw_stats->erx;
394         } else if (BE3_chip(adapter)) {
395                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396
397                 return &hw_stats->erx;
398         } else {
399                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         }
403 }
404
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410         struct be_port_rxf_stats_v0 *port_stats =
411                                         &rxf_stats->port[adapter->port_num];
412         struct be_drv_stats *drvs = &adapter->drv_stats;
413
414         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415         drvs->rx_pause_frames = port_stats->rx_pause_frames;
416         drvs->rx_crc_errors = port_stats->rx_crc_errors;
417         drvs->rx_control_frames = port_stats->rx_control_frames;
418         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430         drvs->rx_dropped_header_too_small =
431                 port_stats->rx_dropped_header_too_small;
432         drvs->rx_address_filtered =
433                                         port_stats->rx_address_filtered +
434                                         port_stats->rx_vlan_filtered;
435         drvs->rx_alignment_symbol_errors =
436                 port_stats->rx_alignment_symbol_errors;
437
438         drvs->tx_pauseframes = port_stats->tx_pauseframes;
439         drvs->tx_controlframes = port_stats->tx_controlframes;
440
441         if (adapter->port_num)
442                 drvs->jabber_events = rxf_stats->port1_jabber_events;
443         else
444                 drvs->jabber_events = rxf_stats->port0_jabber_events;
445         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447         drvs->forwarded_packets = rxf_stats->forwarded_packets;
448         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459         struct be_port_rxf_stats_v1 *port_stats =
460                                         &rxf_stats->port[adapter->port_num];
461         struct be_drv_stats *drvs = &adapter->drv_stats;
462
463         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466         drvs->rx_pause_frames = port_stats->rx_pause_frames;
467         drvs->rx_crc_errors = port_stats->rx_crc_errors;
468         drvs->rx_control_frames = port_stats->rx_control_frames;
469         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479         drvs->rx_dropped_header_too_small =
480                 port_stats->rx_dropped_header_too_small;
481         drvs->rx_input_fifo_overflow_drop =
482                 port_stats->rx_input_fifo_overflow_drop;
483         drvs->rx_address_filtered = port_stats->rx_address_filtered;
484         drvs->rx_alignment_symbol_errors =
485                 port_stats->rx_alignment_symbol_errors;
486         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487         drvs->tx_pauseframes = port_stats->tx_pauseframes;
488         drvs->tx_controlframes = port_stats->tx_controlframes;
489         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490         drvs->jabber_events = port_stats->jabber_events;
491         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493         drvs->forwarded_packets = rxf_stats->forwarded_packets;
494         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505         struct be_port_rxf_stats_v2 *port_stats =
506                                         &rxf_stats->port[adapter->port_num];
507         struct be_drv_stats *drvs = &adapter->drv_stats;
508
509         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512         drvs->rx_pause_frames = port_stats->rx_pause_frames;
513         drvs->rx_crc_errors = port_stats->rx_crc_errors;
514         drvs->rx_control_frames = port_stats->rx_control_frames;
515         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525         drvs->rx_dropped_header_too_small =
526                 port_stats->rx_dropped_header_too_small;
527         drvs->rx_input_fifo_overflow_drop =
528                 port_stats->rx_input_fifo_overflow_drop;
529         drvs->rx_address_filtered = port_stats->rx_address_filtered;
530         drvs->rx_alignment_symbol_errors =
531                 port_stats->rx_alignment_symbol_errors;
532         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533         drvs->tx_pauseframes = port_stats->tx_pauseframes;
534         drvs->tx_controlframes = port_stats->tx_controlframes;
535         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536         drvs->jabber_events = port_stats->jabber_events;
537         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539         drvs->forwarded_packets = rxf_stats->forwarded_packets;
540         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544         if (be_roce_supported(adapter)) {
545                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547                 drvs->rx_roce_frames = port_stats->roce_frames_received;
548                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
549                 drvs->roce_drops_payload_len =
550                         port_stats->roce_drops_payload_len;
551         }
552 }
553
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556         struct be_drv_stats *drvs = &adapter->drv_stats;
557         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558
559         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569         drvs->rx_dropped_tcp_length =
570                                 pport_stats->rx_dropped_invalid_tcp_length;
571         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574         drvs->rx_dropped_header_too_small =
575                                 pport_stats->rx_dropped_header_too_small;
576         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577         drvs->rx_address_filtered =
578                                         pport_stats->rx_address_filtered +
579                                         pport_stats->rx_vlan_filtered;
580         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584         drvs->jabber_events = pport_stats->rx_jabbers;
585         drvs->forwarded_packets = pport_stats->num_forwards_lo;
586         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587         drvs->rx_drops_too_many_frags =
588                                 pport_stats->rx_drops_too_many_frags_lo;
589 }
590
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)                   (x & 0xFFFF)
594 #define hi(x)                   (x & 0xFFFF0000)
595         bool wrapped = val < lo(*acc);
596         u32 newacc = hi(*acc) + val;
597
598         if (wrapped)
599                 newacc += 65536;
600         ACCESS_ONCE(*acc) = newacc;
601 }
602
603 static void populate_erx_stats(struct be_adapter *adapter,
604                                struct be_rx_obj *rxo, u32 erx_stat)
605 {
606         if (!BEx_chip(adapter))
607                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608         else
609                 /* below erx HW counter can actually wrap around after
610                  * 65535. Driver accumulates a 32-bit value
611                  */
612                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613                                      (u16)erx_stat);
614 }
615
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619         struct be_rx_obj *rxo;
620         int i;
621         u32 erx_stat;
622
623         if (lancer_chip(adapter)) {
624                 populate_lancer_stats(adapter);
625         } else {
626                 if (BE2_chip(adapter))
627                         populate_be_v0_stats(adapter);
628                 else if (BE3_chip(adapter))
629                         /* for BE3 */
630                         populate_be_v1_stats(adapter);
631                 else
632                         populate_be_v2_stats(adapter);
633
634                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635                 for_all_rx_queues(adapter, rxo, i) {
636                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637                         populate_erx_stats(adapter, rxo, erx_stat);
638                 }
639         }
640 }
641
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643                                                 struct rtnl_link_stats64 *stats)
644 {
645         struct be_adapter *adapter = netdev_priv(netdev);
646         struct be_drv_stats *drvs = &adapter->drv_stats;
647         struct be_rx_obj *rxo;
648         struct be_tx_obj *txo;
649         u64 pkts, bytes;
650         unsigned int start;
651         int i;
652
653         for_all_rx_queues(adapter, rxo, i) {
654                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
655
656                 do {
657                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658                         pkts = rx_stats(rxo)->rx_pkts;
659                         bytes = rx_stats(rxo)->rx_bytes;
660                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661                 stats->rx_packets += pkts;
662                 stats->rx_bytes += bytes;
663                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665                                         rx_stats(rxo)->rx_drops_no_frags;
666         }
667
668         for_all_tx_queues(adapter, txo, i) {
669                 const struct be_tx_stats *tx_stats = tx_stats(txo);
670
671                 do {
672                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673                         pkts = tx_stats(txo)->tx_pkts;
674                         bytes = tx_stats(txo)->tx_bytes;
675                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676                 stats->tx_packets += pkts;
677                 stats->tx_bytes += bytes;
678         }
679
680         /* bad pkts received */
681         stats->rx_errors = drvs->rx_crc_errors +
682                 drvs->rx_alignment_symbol_errors +
683                 drvs->rx_in_range_errors +
684                 drvs->rx_out_range_errors +
685                 drvs->rx_frame_too_long +
686                 drvs->rx_dropped_too_small +
687                 drvs->rx_dropped_too_short +
688                 drvs->rx_dropped_header_too_small +
689                 drvs->rx_dropped_tcp_length +
690                 drvs->rx_dropped_runt;
691
692         /* detailed rx errors */
693         stats->rx_length_errors = drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long;
696
697         stats->rx_crc_errors = drvs->rx_crc_errors;
698
699         /* frame alignment errors */
700         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701
702         /* receiver fifo overrun */
703         /* drops_no_pbuf is no per i/f, it's per BE card */
704         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705                                 drvs->rx_input_fifo_overflow_drop +
706                                 drvs->rx_drops_no_pbuf;
707         return stats;
708 }
709
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712         struct net_device *netdev = adapter->netdev;
713
714         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715                 netif_carrier_off(netdev);
716                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717         }
718
719         if (link_status)
720                 netif_carrier_on(netdev);
721         else
722                 netif_carrier_off(netdev);
723
724         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726
727 static int be_gso_hdr_len(struct sk_buff *skb)
728 {
729         if (skb->encapsulation)
730                 return skb_inner_transport_offset(skb) +
731                        inner_tcp_hdrlen(skb);
732         return skb_transport_offset(skb) + tcp_hdrlen(skb);
733 }
734
735 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
736 {
737         struct be_tx_stats *stats = tx_stats(txo);
738         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
739         /* Account for headers which get duplicated in TSO pkt */
740         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
741
742         u64_stats_update_begin(&stats->sync);
743         stats->tx_reqs++;
744         stats->tx_bytes += skb->len + dup_hdr_len;
745         stats->tx_pkts += tx_pkts;
746         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
747                 stats->tx_vxlan_offload_pkts += tx_pkts;
748         u64_stats_update_end(&stats->sync);
749 }
750
751 /* Returns number of WRBs needed for the skb */
752 static u32 skb_wrb_cnt(struct sk_buff *skb)
753 {
754         /* +1 for the header wrb */
755         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
756 }
757
758 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
759 {
760         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
761         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
762         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
763         wrb->rsvd0 = 0;
764 }
765
766 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
767  * to avoid the swap and shift/mask operations in wrb_fill().
768  */
769 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
770 {
771         wrb->frag_pa_hi = 0;
772         wrb->frag_pa_lo = 0;
773         wrb->frag_len = 0;
774         wrb->rsvd0 = 0;
775 }
776
777 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
778                                      struct sk_buff *skb)
779 {
780         u8 vlan_prio;
781         u16 vlan_tag;
782
783         vlan_tag = skb_vlan_tag_get(skb);
784         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
785         /* If vlan priority provided by OS is NOT in available bmap */
786         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
787                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
788                                 adapter->recommended_prio_bits;
789
790         return vlan_tag;
791 }
792
793 /* Used only for IP tunnel packets */
794 static u16 skb_inner_ip_proto(struct sk_buff *skb)
795 {
796         return (inner_ip_hdr(skb)->version == 4) ?
797                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
798 }
799
800 static u16 skb_ip_proto(struct sk_buff *skb)
801 {
802         return (ip_hdr(skb)->version == 4) ?
803                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
804 }
805
806 static inline bool be_is_txq_full(struct be_tx_obj *txo)
807 {
808         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
809 }
810
811 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
812 {
813         return atomic_read(&txo->q.used) < txo->q.len / 2;
814 }
815
816 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
817 {
818         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
819 }
820
821 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
822                                        struct sk_buff *skb,
823                                        struct be_wrb_params *wrb_params)
824 {
825         u16 proto;
826
827         if (skb_is_gso(skb)) {
828                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
829                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
830                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
831                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
832         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
833                 if (skb->encapsulation) {
834                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
835                         proto = skb_inner_ip_proto(skb);
836                 } else {
837                         proto = skb_ip_proto(skb);
838                 }
839                 if (proto == IPPROTO_TCP)
840                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
841                 else if (proto == IPPROTO_UDP)
842                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
843         }
844
845         if (skb_vlan_tag_present(skb)) {
846                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
847                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
848         }
849
850         BE_WRB_F_SET(wrb_params->features, CRC, 1);
851 }
852
853 static void wrb_fill_hdr(struct be_adapter *adapter,
854                          struct be_eth_hdr_wrb *hdr,
855                          struct be_wrb_params *wrb_params,
856                          struct sk_buff *skb)
857 {
858         memset(hdr, 0, sizeof(*hdr));
859
860         SET_TX_WRB_HDR_BITS(crc, hdr,
861                             BE_WRB_F_GET(wrb_params->features, CRC));
862         SET_TX_WRB_HDR_BITS(ipcs, hdr,
863                             BE_WRB_F_GET(wrb_params->features, IPCS));
864         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
865                             BE_WRB_F_GET(wrb_params->features, TCPCS));
866         SET_TX_WRB_HDR_BITS(udpcs, hdr,
867                             BE_WRB_F_GET(wrb_params->features, UDPCS));
868
869         SET_TX_WRB_HDR_BITS(lso, hdr,
870                             BE_WRB_F_GET(wrb_params->features, LSO));
871         SET_TX_WRB_HDR_BITS(lso6, hdr,
872                             BE_WRB_F_GET(wrb_params->features, LSO6));
873         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
874
875         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
876          * hack is not needed, the evt bit is set while ringing DB.
877          */
878         SET_TX_WRB_HDR_BITS(event, hdr,
879                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
880         SET_TX_WRB_HDR_BITS(vlan, hdr,
881                             BE_WRB_F_GET(wrb_params->features, VLAN));
882         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
883
884         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
885         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
886         SET_TX_WRB_HDR_BITS(mgmt, hdr,
887                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
888 }
889
890 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
891                           bool unmap_single)
892 {
893         dma_addr_t dma;
894         u32 frag_len = le32_to_cpu(wrb->frag_len);
895
896
897         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
898                 (u64)le32_to_cpu(wrb->frag_pa_lo);
899         if (frag_len) {
900                 if (unmap_single)
901                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
902                 else
903                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
904         }
905 }
906
907 /* Grab a WRB header for xmit */
908 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
909 {
910         u32 head = txo->q.head;
911
912         queue_head_inc(&txo->q);
913         return head;
914 }
915
916 /* Set up the WRB header for xmit */
917 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
918                                 struct be_tx_obj *txo,
919                                 struct be_wrb_params *wrb_params,
920                                 struct sk_buff *skb, u16 head)
921 {
922         u32 num_frags = skb_wrb_cnt(skb);
923         struct be_queue_info *txq = &txo->q;
924         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
925
926         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
927         be_dws_cpu_to_le(hdr, sizeof(*hdr));
928
929         BUG_ON(txo->sent_skb_list[head]);
930         txo->sent_skb_list[head] = skb;
931         txo->last_req_hdr = head;
932         atomic_add(num_frags, &txq->used);
933         txo->last_req_wrb_cnt = num_frags;
934         txo->pend_wrb_cnt += num_frags;
935 }
936
937 /* Setup a WRB fragment (buffer descriptor) for xmit */
938 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
939                                  int len)
940 {
941         struct be_eth_wrb *wrb;
942         struct be_queue_info *txq = &txo->q;
943
944         wrb = queue_head_node(txq);
945         wrb_fill(wrb, busaddr, len);
946         queue_head_inc(txq);
947 }
948
949 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
950  * was invoked. The producer index is restored to the previous packet and the
951  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
952  */
953 static void be_xmit_restore(struct be_adapter *adapter,
954                             struct be_tx_obj *txo, u32 head, bool map_single,
955                             u32 copied)
956 {
957         struct device *dev;
958         struct be_eth_wrb *wrb;
959         struct be_queue_info *txq = &txo->q;
960
961         dev = &adapter->pdev->dev;
962         txq->head = head;
963
964         /* skip the first wrb (hdr); it's not mapped */
965         queue_head_inc(txq);
966         while (copied) {
967                 wrb = queue_head_node(txq);
968                 unmap_tx_frag(dev, wrb, map_single);
969                 map_single = false;
970                 copied -= le32_to_cpu(wrb->frag_len);
971                 queue_head_inc(txq);
972         }
973
974         txq->head = head;
975 }
976
977 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
978  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
979  * of WRBs used up by the packet.
980  */
981 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
982                            struct sk_buff *skb,
983                            struct be_wrb_params *wrb_params)
984 {
985         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
986         struct device *dev = &adapter->pdev->dev;
987         struct be_queue_info *txq = &txo->q;
988         bool map_single = false;
989         u32 head = txq->head;
990         dma_addr_t busaddr;
991         int len;
992
993         head = be_tx_get_wrb_hdr(txo);
994
995         if (skb->len > skb->data_len) {
996                 len = skb_headlen(skb);
997
998                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
999                 if (dma_mapping_error(dev, busaddr))
1000                         goto dma_err;
1001                 map_single = true;
1002                 be_tx_setup_wrb_frag(txo, busaddr, len);
1003                 copied += len;
1004         }
1005
1006         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1007                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1008                 len = skb_frag_size(frag);
1009
1010                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1011                 if (dma_mapping_error(dev, busaddr))
1012                         goto dma_err;
1013                 be_tx_setup_wrb_frag(txo, busaddr, len);
1014                 copied += len;
1015         }
1016
1017         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1018
1019         be_tx_stats_update(txo, skb);
1020         return wrb_cnt;
1021
1022 dma_err:
1023         adapter->drv_stats.dma_map_errors++;
1024         be_xmit_restore(adapter, txo, head, map_single, copied);
1025         return 0;
1026 }
1027
1028 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1029 {
1030         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1031 }
1032
1033 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1034                                              struct sk_buff *skb,
1035                                              struct be_wrb_params
1036                                              *wrb_params)
1037 {
1038         u16 vlan_tag = 0;
1039
1040         skb = skb_share_check(skb, GFP_ATOMIC);
1041         if (unlikely(!skb))
1042                 return skb;
1043
1044         if (skb_vlan_tag_present(skb))
1045                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1046
1047         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1048                 if (!vlan_tag)
1049                         vlan_tag = adapter->pvid;
1050                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1051                  * skip VLAN insertion
1052                  */
1053                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1054         }
1055
1056         if (vlan_tag) {
1057                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058                                                 vlan_tag);
1059                 if (unlikely(!skb))
1060                         return skb;
1061                 skb->vlan_tci = 0;
1062         }
1063
1064         /* Insert the outer VLAN, if any */
1065         if (adapter->qnq_vid) {
1066                 vlan_tag = adapter->qnq_vid;
1067                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1068                                                 vlan_tag);
1069                 if (unlikely(!skb))
1070                         return skb;
1071                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1072         }
1073
1074         return skb;
1075 }
1076
1077 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1078 {
1079         struct ethhdr *eh = (struct ethhdr *)skb->data;
1080         u16 offset = ETH_HLEN;
1081
1082         if (eh->h_proto == htons(ETH_P_IPV6)) {
1083                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1084
1085                 offset += sizeof(struct ipv6hdr);
1086                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1087                     ip6h->nexthdr != NEXTHDR_UDP) {
1088                         struct ipv6_opt_hdr *ehdr =
1089                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1090
1091                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1092                         if (ehdr->hdrlen == 0xff)
1093                                 return true;
1094                 }
1095         }
1096         return false;
1097 }
1098
1099 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1100 {
1101         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1102 }
1103
1104 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1105 {
1106         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1107 }
1108
1109 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1110                                                   struct sk_buff *skb,
1111                                                   struct be_wrb_params
1112                                                   *wrb_params)
1113 {
1114         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1115         unsigned int eth_hdr_len;
1116         struct iphdr *ip;
1117
1118         /* For padded packets, BE HW modifies tot_len field in IP header
1119          * incorrecly when VLAN tag is inserted by HW.
1120          * For padded packets, Lancer computes incorrect checksum.
1121          */
1122         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1123                                                 VLAN_ETH_HLEN : ETH_HLEN;
1124         if (skb->len <= 60 &&
1125             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1126             is_ipv4_pkt(skb)) {
1127                 ip = (struct iphdr *)ip_hdr(skb);
1128                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1129         }
1130
1131         /* If vlan tag is already inlined in the packet, skip HW VLAN
1132          * tagging in pvid-tagging mode
1133          */
1134         if (be_pvid_tagging_enabled(adapter) &&
1135             veh->h_vlan_proto == htons(ETH_P_8021Q))
1136                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1137
1138         /* HW has a bug wherein it will calculate CSUM for VLAN
1139          * pkts even though it is disabled.
1140          * Manually insert VLAN in pkt.
1141          */
1142         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1143             skb_vlan_tag_present(skb)) {
1144                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1145                 if (unlikely(!skb))
1146                         goto err;
1147         }
1148
1149         /* HW may lockup when VLAN HW tagging is requested on
1150          * certain ipv6 packets. Drop such pkts if the HW workaround to
1151          * skip HW tagging is not enabled by FW.
1152          */
1153         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1154                      (adapter->pvid || adapter->qnq_vid) &&
1155                      !qnq_async_evt_rcvd(adapter)))
1156                 goto tx_drop;
1157
1158         /* Manual VLAN tag insertion to prevent:
1159          * ASIC lockup when the ASIC inserts VLAN tag into
1160          * certain ipv6 packets. Insert VLAN tags in driver,
1161          * and set event, completion, vlan bits accordingly
1162          * in the Tx WRB.
1163          */
1164         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1165             be_vlan_tag_tx_chk(adapter, skb)) {
1166                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1167                 if (unlikely(!skb))
1168                         goto err;
1169         }
1170
1171         return skb;
1172 tx_drop:
1173         dev_kfree_skb_any(skb);
1174 err:
1175         return NULL;
1176 }
1177
1178 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1179                                            struct sk_buff *skb,
1180                                            struct be_wrb_params *wrb_params)
1181 {
1182         int err;
1183
1184         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1185          * packets that are 32b or less may cause a transmit stall
1186          * on that port. The workaround is to pad such packets
1187          * (len <= 32 bytes) to a minimum length of 36b.
1188          */
1189         if (skb->len <= 32) {
1190                 if (skb_put_padto(skb, 36))
1191                         return NULL;
1192         }
1193
1194         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1195                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1196                 if (!skb)
1197                         return NULL;
1198         }
1199
1200         /* The stack can send us skbs with length greater than
1201          * what the HW can handle. Trim the extra bytes.
1202          */
1203         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1204         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1205         WARN_ON(err);
1206
1207         return skb;
1208 }
1209
1210 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1211 {
1212         struct be_queue_info *txq = &txo->q;
1213         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1214
1215         /* Mark the last request eventable if it hasn't been marked already */
1216         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1217                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1218
1219         /* compose a dummy wrb if there are odd set of wrbs to notify */
1220         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1221                 wrb_fill_dummy(queue_head_node(txq));
1222                 queue_head_inc(txq);
1223                 atomic_inc(&txq->used);
1224                 txo->pend_wrb_cnt++;
1225                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1226                                            TX_HDR_WRB_NUM_SHIFT);
1227                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1228                                           TX_HDR_WRB_NUM_SHIFT);
1229         }
1230         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1231         txo->pend_wrb_cnt = 0;
1232 }
1233
1234 /* OS2BMC related */
1235
1236 #define DHCP_CLIENT_PORT        68
1237 #define DHCP_SERVER_PORT        67
1238 #define NET_BIOS_PORT1          137
1239 #define NET_BIOS_PORT2          138
1240 #define DHCPV6_RAS_PORT         547
1241
1242 #define is_mc_allowed_on_bmc(adapter, eh)       \
1243         (!is_multicast_filt_enabled(adapter) && \
1244          is_multicast_ether_addr(eh->h_dest) && \
1245          !is_broadcast_ether_addr(eh->h_dest))
1246
1247 #define is_bc_allowed_on_bmc(adapter, eh)       \
1248         (!is_broadcast_filt_enabled(adapter) && \
1249          is_broadcast_ether_addr(eh->h_dest))
1250
1251 #define is_arp_allowed_on_bmc(adapter, skb)     \
1252         (is_arp(skb) && is_arp_filt_enabled(adapter))
1253
1254 #define is_broadcast_packet(eh, adapter)        \
1255                 (is_multicast_ether_addr(eh->h_dest) && \
1256                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1257
1258 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1259
1260 #define is_arp_filt_enabled(adapter)    \
1261                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1262
1263 #define is_dhcp_client_filt_enabled(adapter)    \
1264                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1265
1266 #define is_dhcp_srvr_filt_enabled(adapter)      \
1267                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1268
1269 #define is_nbios_filt_enabled(adapter)  \
1270                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1271
1272 #define is_ipv6_na_filt_enabled(adapter)        \
1273                 (adapter->bmc_filt_mask &       \
1274                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1275
1276 #define is_ipv6_ra_filt_enabled(adapter)        \
1277                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1278
1279 #define is_ipv6_ras_filt_enabled(adapter)       \
1280                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1281
1282 #define is_broadcast_filt_enabled(adapter)      \
1283                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1284
1285 #define is_multicast_filt_enabled(adapter)      \
1286                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1287
1288 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1289                                struct sk_buff **skb)
1290 {
1291         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1292         bool os2bmc = false;
1293
1294         if (!be_is_os2bmc_enabled(adapter))
1295                 goto done;
1296
1297         if (!is_multicast_ether_addr(eh->h_dest))
1298                 goto done;
1299
1300         if (is_mc_allowed_on_bmc(adapter, eh) ||
1301             is_bc_allowed_on_bmc(adapter, eh) ||
1302             is_arp_allowed_on_bmc(adapter, (*skb))) {
1303                 os2bmc = true;
1304                 goto done;
1305         }
1306
1307         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1308                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1309                 u8 nexthdr = hdr->nexthdr;
1310
1311                 if (nexthdr == IPPROTO_ICMPV6) {
1312                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1313
1314                         switch (icmp6->icmp6_type) {
1315                         case NDISC_ROUTER_ADVERTISEMENT:
1316                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1317                                 goto done;
1318                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1319                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1320                                 goto done;
1321                         default:
1322                                 break;
1323                         }
1324                 }
1325         }
1326
1327         if (is_udp_pkt((*skb))) {
1328                 struct udphdr *udp = udp_hdr((*skb));
1329
1330                 switch (ntohs(udp->dest)) {
1331                 case DHCP_CLIENT_PORT:
1332                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1333                         goto done;
1334                 case DHCP_SERVER_PORT:
1335                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1336                         goto done;
1337                 case NET_BIOS_PORT1:
1338                 case NET_BIOS_PORT2:
1339                         os2bmc = is_nbios_filt_enabled(adapter);
1340                         goto done;
1341                 case DHCPV6_RAS_PORT:
1342                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1343                         goto done;
1344                 default:
1345                         break;
1346                 }
1347         }
1348 done:
1349         /* For packets over a vlan, which are destined
1350          * to BMC, asic expects the vlan to be inline in the packet.
1351          */
1352         if (os2bmc)
1353                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1354
1355         return os2bmc;
1356 }
1357
1358 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         u16 q_idx = skb_get_queue_mapping(skb);
1362         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1363         struct be_wrb_params wrb_params = { 0 };
1364         bool flush = !skb->xmit_more;
1365         u16 wrb_cnt;
1366
1367         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1368         if (unlikely(!skb))
1369                 goto drop;
1370
1371         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1372
1373         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1374         if (unlikely(!wrb_cnt)) {
1375                 dev_kfree_skb_any(skb);
1376                 goto drop;
1377         }
1378
1379         /* if os2bmc is enabled and if the pkt is destined to bmc,
1380          * enqueue the pkt a 2nd time with mgmt bit set.
1381          */
1382         if (be_send_pkt_to_bmc(adapter, &skb)) {
1383                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1384                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385                 if (unlikely(!wrb_cnt))
1386                         goto drop;
1387                 else
1388                         skb_get(skb);
1389         }
1390
1391         if (be_is_txq_full(txo)) {
1392                 netif_stop_subqueue(netdev, q_idx);
1393                 tx_stats(txo)->tx_stops++;
1394         }
1395
1396         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1397                 be_xmit_flush(adapter, txo);
1398
1399         return NETDEV_TX_OK;
1400 drop:
1401         tx_stats(txo)->tx_drv_drops++;
1402         /* Flush the already enqueued tx requests */
1403         if (flush && txo->pend_wrb_cnt)
1404                 be_xmit_flush(adapter, txo);
1405
1406         return NETDEV_TX_OK;
1407 }
1408
1409 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1410 {
1411         struct be_adapter *adapter = netdev_priv(netdev);
1412         struct device *dev = &adapter->pdev->dev;
1413
1414         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1415                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1416                          BE_MIN_MTU, BE_MAX_MTU);
1417                 return -EINVAL;
1418         }
1419
1420         dev_info(dev, "MTU changed from %d to %d bytes\n",
1421                  netdev->mtu, new_mtu);
1422         netdev->mtu = new_mtu;
1423         return 0;
1424 }
1425
1426 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1427 {
1428         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1429                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1430 }
1431
1432 static int be_set_vlan_promisc(struct be_adapter *adapter)
1433 {
1434         struct device *dev = &adapter->pdev->dev;
1435         int status;
1436
1437         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1438                 return 0;
1439
1440         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1441         if (!status) {
1442                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1443                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1444         } else {
1445                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1446         }
1447         return status;
1448 }
1449
1450 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1451 {
1452         struct device *dev = &adapter->pdev->dev;
1453         int status;
1454
1455         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1456         if (!status) {
1457                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1458                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1459         }
1460         return status;
1461 }
1462
1463 /*
1464  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1465  * If the user configures more, place BE in vlan promiscuous mode.
1466  */
1467 static int be_vid_config(struct be_adapter *adapter)
1468 {
1469         struct device *dev = &adapter->pdev->dev;
1470         u16 vids[BE_NUM_VLANS_SUPPORTED];
1471         u16 num = 0, i = 0;
1472         int status = 0;
1473
1474         /* No need to change the VLAN state if the I/F is in promiscuous */
1475         if (adapter->netdev->flags & IFF_PROMISC)
1476                 return 0;
1477
1478         if (adapter->vlans_added > be_max_vlans(adapter))
1479                 return be_set_vlan_promisc(adapter);
1480
1481         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1482                 status = be_clear_vlan_promisc(adapter);
1483                 if (status)
1484                         return status;
1485         }
1486         /* Construct VLAN Table to give to HW */
1487         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1488                 vids[num++] = cpu_to_le16(i);
1489
1490         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1491         if (status) {
1492                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1493                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1494                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1495                     addl_status(status) ==
1496                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1497                         return be_set_vlan_promisc(adapter);
1498         }
1499         return status;
1500 }
1501
1502 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1503 {
1504         struct be_adapter *adapter = netdev_priv(netdev);
1505         int status = 0;
1506
1507         mutex_lock(&adapter->rx_filter_lock);
1508
1509         /* Packets with VID 0 are always received by Lancer by default */
1510         if (lancer_chip(adapter) && vid == 0)
1511                 goto done;
1512
1513         if (test_bit(vid, adapter->vids))
1514                 goto done;
1515
1516         set_bit(vid, adapter->vids);
1517         adapter->vlans_added++;
1518
1519         status = be_vid_config(adapter);
1520 done:
1521         mutex_unlock(&adapter->rx_filter_lock);
1522         return status;
1523 }
1524
1525 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1526 {
1527         struct be_adapter *adapter = netdev_priv(netdev);
1528         int status = 0;
1529
1530         mutex_lock(&adapter->rx_filter_lock);
1531
1532         /* Packets with VID 0 are always received by Lancer by default */
1533         if (lancer_chip(adapter) && vid == 0)
1534                 goto done;
1535
1536         if (!test_bit(vid, adapter->vids))
1537                 goto done;
1538
1539         clear_bit(vid, adapter->vids);
1540         adapter->vlans_added--;
1541
1542         status = be_vid_config(adapter);
1543 done:
1544         mutex_unlock(&adapter->rx_filter_lock);
1545         return status;
1546 }
1547
1548 static void be_set_all_promisc(struct be_adapter *adapter)
1549 {
1550         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1551         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1552 }
1553
1554 static void be_set_mc_promisc(struct be_adapter *adapter)
1555 {
1556         int status;
1557
1558         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1559                 return;
1560
1561         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1562         if (!status)
1563                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1564 }
1565
1566 static void be_set_uc_promisc(struct be_adapter *adapter)
1567 {
1568         int status;
1569
1570         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1571                 return;
1572
1573         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1574         if (!status)
1575                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1576 }
1577
1578 static void be_clear_uc_promisc(struct be_adapter *adapter)
1579 {
1580         int status;
1581
1582         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1583                 return;
1584
1585         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1586         if (!status)
1587                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1588 }
1589
1590 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1591  * We use a single callback function for both sync and unsync. We really don't
1592  * add/remove addresses through this callback. But, we use it to detect changes
1593  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1594  */
1595 static int be_uc_list_update(struct net_device *netdev,
1596                              const unsigned char *addr)
1597 {
1598         struct be_adapter *adapter = netdev_priv(netdev);
1599
1600         adapter->update_uc_list = true;
1601         return 0;
1602 }
1603
1604 static int be_mc_list_update(struct net_device *netdev,
1605                              const unsigned char *addr)
1606 {
1607         struct be_adapter *adapter = netdev_priv(netdev);
1608
1609         adapter->update_mc_list = true;
1610         return 0;
1611 }
1612
1613 static void be_set_mc_list(struct be_adapter *adapter)
1614 {
1615         struct net_device *netdev = adapter->netdev;
1616         struct netdev_hw_addr *ha;
1617         bool mc_promisc = false;
1618         int status;
1619
1620         netif_addr_lock_bh(netdev);
1621         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1622
1623         if (netdev->flags & IFF_PROMISC) {
1624                 adapter->update_mc_list = false;
1625         } else if (netdev->flags & IFF_ALLMULTI ||
1626                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1627                 /* Enable multicast promisc if num configured exceeds
1628                  * what we support
1629                  */
1630                 mc_promisc = true;
1631                 adapter->update_mc_list = false;
1632         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1633                 /* Update mc-list unconditionally if the iface was previously
1634                  * in mc-promisc mode and now is out of that mode.
1635                  */
1636                 adapter->update_mc_list = true;
1637         }
1638
1639         if (adapter->update_mc_list) {
1640                 int i = 0;
1641
1642                 /* cache the mc-list in adapter */
1643                 netdev_for_each_mc_addr(ha, netdev) {
1644                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1645                         i++;
1646                 }
1647                 adapter->mc_count = netdev_mc_count(netdev);
1648         }
1649         netif_addr_unlock_bh(netdev);
1650
1651         if (mc_promisc) {
1652                 be_set_mc_promisc(adapter);
1653         } else if (adapter->update_mc_list) {
1654                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1655                 if (!status)
1656                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1657                 else
1658                         be_set_mc_promisc(adapter);
1659
1660                 adapter->update_mc_list = false;
1661         }
1662 }
1663
1664 static void be_clear_mc_list(struct be_adapter *adapter)
1665 {
1666         struct net_device *netdev = adapter->netdev;
1667
1668         __dev_mc_unsync(netdev, NULL);
1669         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1670         adapter->mc_count = 0;
1671 }
1672
1673 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1674 {
1675         if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1676                              adapter->dev_mac)) {
1677                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1678                 return 0;
1679         }
1680
1681         return be_cmd_pmac_add(adapter,
1682                                (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1683                                adapter->if_handle,
1684                                &adapter->pmac_id[uc_idx + 1], 0);
1685 }
1686
1687 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1688 {
1689         if (pmac_id == adapter->pmac_id[0])
1690                 return;
1691
1692         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1693 }
1694
1695 static void be_set_uc_list(struct be_adapter *adapter)
1696 {
1697         struct net_device *netdev = adapter->netdev;
1698         struct netdev_hw_addr *ha;
1699         bool uc_promisc = false;
1700         int curr_uc_macs = 0, i;
1701
1702         netif_addr_lock_bh(netdev);
1703         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1704
1705         if (netdev->flags & IFF_PROMISC) {
1706                 adapter->update_uc_list = false;
1707         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1708                 uc_promisc = true;
1709                 adapter->update_uc_list = false;
1710         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1711                 /* Update uc-list unconditionally if the iface was previously
1712                  * in uc-promisc mode and now is out of that mode.
1713                  */
1714                 adapter->update_uc_list = true;
1715         }
1716
1717         if (adapter->update_uc_list) {
1718                 i = 1; /* First slot is claimed by the Primary MAC */
1719
1720                 /* cache the uc-list in adapter array */
1721                 netdev_for_each_uc_addr(ha, netdev) {
1722                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1723                         i++;
1724                 }
1725                 curr_uc_macs = netdev_uc_count(netdev);
1726         }
1727         netif_addr_unlock_bh(netdev);
1728
1729         if (uc_promisc) {
1730                 be_set_uc_promisc(adapter);
1731         } else if (adapter->update_uc_list) {
1732                 be_clear_uc_promisc(adapter);
1733
1734                 for (i = 0; i < adapter->uc_macs; i++)
1735                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1736
1737                 for (i = 0; i < curr_uc_macs; i++)
1738                         be_uc_mac_add(adapter, i);
1739                 adapter->uc_macs = curr_uc_macs;
1740                 adapter->update_uc_list = false;
1741         }
1742 }
1743
1744 static void be_clear_uc_list(struct be_adapter *adapter)
1745 {
1746         struct net_device *netdev = adapter->netdev;
1747         int i;
1748
1749         __dev_uc_unsync(netdev, NULL);
1750         for (i = 0; i < adapter->uc_macs; i++)
1751                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1752
1753         adapter->uc_macs = 0;
1754 }
1755
1756 static void __be_set_rx_mode(struct be_adapter *adapter)
1757 {
1758         struct net_device *netdev = adapter->netdev;
1759
1760         mutex_lock(&adapter->rx_filter_lock);
1761
1762         if (netdev->flags & IFF_PROMISC) {
1763                 if (!be_in_all_promisc(adapter))
1764                         be_set_all_promisc(adapter);
1765         } else if (be_in_all_promisc(adapter)) {
1766                 /* We need to re-program the vlan-list or clear
1767                  * vlan-promisc mode (if needed) when the interface
1768                  * comes out of promisc mode.
1769                  */
1770                 be_vid_config(adapter);
1771         }
1772
1773         be_set_uc_list(adapter);
1774         be_set_mc_list(adapter);
1775
1776         mutex_unlock(&adapter->rx_filter_lock);
1777 }
1778
1779 static void be_work_set_rx_mode(struct work_struct *work)
1780 {
1781         struct be_cmd_work *cmd_work =
1782                                 container_of(work, struct be_cmd_work, work);
1783
1784         __be_set_rx_mode(cmd_work->adapter);
1785         kfree(cmd_work);
1786 }
1787
1788 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1789 {
1790         struct be_adapter *adapter = netdev_priv(netdev);
1791         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1792         int status;
1793
1794         if (!sriov_enabled(adapter))
1795                 return -EPERM;
1796
1797         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1798                 return -EINVAL;
1799
1800         /* Proceed further only if user provided MAC is different
1801          * from active MAC
1802          */
1803         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1804                 return 0;
1805
1806         if (BEx_chip(adapter)) {
1807                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1808                                 vf + 1);
1809
1810                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1811                                          &vf_cfg->pmac_id, vf + 1);
1812         } else {
1813                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1814                                         vf + 1);
1815         }
1816
1817         if (status) {
1818                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1819                         mac, vf, status);
1820                 return be_cmd_status(status);
1821         }
1822
1823         ether_addr_copy(vf_cfg->mac_addr, mac);
1824
1825         return 0;
1826 }
1827
1828 static int be_get_vf_config(struct net_device *netdev, int vf,
1829                             struct ifla_vf_info *vi)
1830 {
1831         struct be_adapter *adapter = netdev_priv(netdev);
1832         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1833
1834         if (!sriov_enabled(adapter))
1835                 return -EPERM;
1836
1837         if (vf >= adapter->num_vfs)
1838                 return -EINVAL;
1839
1840         vi->vf = vf;
1841         vi->max_tx_rate = vf_cfg->tx_rate;
1842         vi->min_tx_rate = 0;
1843         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1844         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1845         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1846         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1847         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1848
1849         return 0;
1850 }
1851
1852 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1853 {
1854         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1855         u16 vids[BE_NUM_VLANS_SUPPORTED];
1856         int vf_if_id = vf_cfg->if_handle;
1857         int status;
1858
1859         /* Enable Transparent VLAN Tagging */
1860         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1861         if (status)
1862                 return status;
1863
1864         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1865         vids[0] = 0;
1866         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1867         if (!status)
1868                 dev_info(&adapter->pdev->dev,
1869                          "Cleared guest VLANs on VF%d", vf);
1870
1871         /* After TVT is enabled, disallow VFs to program VLAN filters */
1872         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1873                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1874                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1875                 if (!status)
1876                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1877         }
1878         return 0;
1879 }
1880
1881 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1882 {
1883         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1884         struct device *dev = &adapter->pdev->dev;
1885         int status;
1886
1887         /* Reset Transparent VLAN Tagging. */
1888         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1889                                        vf_cfg->if_handle, 0, 0);
1890         if (status)
1891                 return status;
1892
1893         /* Allow VFs to program VLAN filtering */
1894         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1895                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1896                                                   BE_PRIV_FILTMGMT, vf + 1);
1897                 if (!status) {
1898                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1899                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1900                 }
1901         }
1902
1903         dev_info(dev,
1904                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1905         return 0;
1906 }
1907
1908 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1909                           __be16 vlan_proto)
1910 {
1911         struct be_adapter *adapter = netdev_priv(netdev);
1912         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1913         int status;
1914
1915         if (!sriov_enabled(adapter))
1916                 return -EPERM;
1917
1918         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1919                 return -EINVAL;
1920
1921         if (vlan_proto != htons(ETH_P_8021Q))
1922                 return -EPROTONOSUPPORT;
1923
1924         if (vlan || qos) {
1925                 vlan |= qos << VLAN_PRIO_SHIFT;
1926                 status = be_set_vf_tvt(adapter, vf, vlan);
1927         } else {
1928                 status = be_clear_vf_tvt(adapter, vf);
1929         }
1930
1931         if (status) {
1932                 dev_err(&adapter->pdev->dev,
1933                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1934                         status);
1935                 return be_cmd_status(status);
1936         }
1937
1938         vf_cfg->vlan_tag = vlan;
1939         return 0;
1940 }
1941
1942 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1943                              int min_tx_rate, int max_tx_rate)
1944 {
1945         struct be_adapter *adapter = netdev_priv(netdev);
1946         struct device *dev = &adapter->pdev->dev;
1947         int percent_rate, status = 0;
1948         u16 link_speed = 0;
1949         u8 link_status;
1950
1951         if (!sriov_enabled(adapter))
1952                 return -EPERM;
1953
1954         if (vf >= adapter->num_vfs)
1955                 return -EINVAL;
1956
1957         if (min_tx_rate)
1958                 return -EINVAL;
1959
1960         if (!max_tx_rate)
1961                 goto config_qos;
1962
1963         status = be_cmd_link_status_query(adapter, &link_speed,
1964                                           &link_status, 0);
1965         if (status)
1966                 goto err;
1967
1968         if (!link_status) {
1969                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1970                 status = -ENETDOWN;
1971                 goto err;
1972         }
1973
1974         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1975                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1976                         link_speed);
1977                 status = -EINVAL;
1978                 goto err;
1979         }
1980
1981         /* On Skyhawk the QOS setting must be done only as a % value */
1982         percent_rate = link_speed / 100;
1983         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1984                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1985                         percent_rate);
1986                 status = -EINVAL;
1987                 goto err;
1988         }
1989
1990 config_qos:
1991         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1992         if (status)
1993                 goto err;
1994
1995         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1996         return 0;
1997
1998 err:
1999         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2000                 max_tx_rate, vf);
2001         return be_cmd_status(status);
2002 }
2003
2004 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2005                                 int link_state)
2006 {
2007         struct be_adapter *adapter = netdev_priv(netdev);
2008         int status;
2009
2010         if (!sriov_enabled(adapter))
2011                 return -EPERM;
2012
2013         if (vf >= adapter->num_vfs)
2014                 return -EINVAL;
2015
2016         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2017         if (status) {
2018                 dev_err(&adapter->pdev->dev,
2019                         "Link state change on VF %d failed: %#x\n", vf, status);
2020                 return be_cmd_status(status);
2021         }
2022
2023         adapter->vf_cfg[vf].plink_tracking = link_state;
2024
2025         return 0;
2026 }
2027
2028 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2029 {
2030         struct be_adapter *adapter = netdev_priv(netdev);
2031         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2032         u8 spoofchk;
2033         int status;
2034
2035         if (!sriov_enabled(adapter))
2036                 return -EPERM;
2037
2038         if (vf >= adapter->num_vfs)
2039                 return -EINVAL;
2040
2041         if (BEx_chip(adapter))
2042                 return -EOPNOTSUPP;
2043
2044         if (enable == vf_cfg->spoofchk)
2045                 return 0;
2046
2047         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2048
2049         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2050                                        0, spoofchk);
2051         if (status) {
2052                 dev_err(&adapter->pdev->dev,
2053                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2054                 return be_cmd_status(status);
2055         }
2056
2057         vf_cfg->spoofchk = enable;
2058         return 0;
2059 }
2060
2061 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2062                           ulong now)
2063 {
2064         aic->rx_pkts_prev = rx_pkts;
2065         aic->tx_reqs_prev = tx_pkts;
2066         aic->jiffies = now;
2067 }
2068
2069 static int be_get_new_eqd(struct be_eq_obj *eqo)
2070 {
2071         struct be_adapter *adapter = eqo->adapter;
2072         int eqd, start;
2073         struct be_aic_obj *aic;
2074         struct be_rx_obj *rxo;
2075         struct be_tx_obj *txo;
2076         u64 rx_pkts = 0, tx_pkts = 0;
2077         ulong now;
2078         u32 pps, delta;
2079         int i;
2080
2081         aic = &adapter->aic_obj[eqo->idx];
2082         if (!aic->enable) {
2083                 if (aic->jiffies)
2084                         aic->jiffies = 0;
2085                 eqd = aic->et_eqd;
2086                 return eqd;
2087         }
2088
2089         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2090                 do {
2091                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2092                         rx_pkts += rxo->stats.rx_pkts;
2093                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2094         }
2095
2096         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2097                 do {
2098                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2099                         tx_pkts += txo->stats.tx_reqs;
2100                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2101         }
2102
2103         /* Skip, if wrapped around or first calculation */
2104         now = jiffies;
2105         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2106             rx_pkts < aic->rx_pkts_prev ||
2107             tx_pkts < aic->tx_reqs_prev) {
2108                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2109                 return aic->prev_eqd;
2110         }
2111
2112         delta = jiffies_to_msecs(now - aic->jiffies);
2113         if (delta == 0)
2114                 return aic->prev_eqd;
2115
2116         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2117                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2118         eqd = (pps / 15000) << 2;
2119
2120         if (eqd < 8)
2121                 eqd = 0;
2122         eqd = min_t(u32, eqd, aic->max_eqd);
2123         eqd = max_t(u32, eqd, aic->min_eqd);
2124
2125         be_aic_update(aic, rx_pkts, tx_pkts, now);
2126
2127         return eqd;
2128 }
2129
2130 /* For Skyhawk-R only */
2131 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2132 {
2133         struct be_adapter *adapter = eqo->adapter;
2134         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2135         ulong now = jiffies;
2136         int eqd;
2137         u32 mult_enc;
2138
2139         if (!aic->enable)
2140                 return 0;
2141
2142         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2143                 eqd = aic->prev_eqd;
2144         else
2145                 eqd = be_get_new_eqd(eqo);
2146
2147         if (eqd > 100)
2148                 mult_enc = R2I_DLY_ENC_1;
2149         else if (eqd > 60)
2150                 mult_enc = R2I_DLY_ENC_2;
2151         else if (eqd > 20)
2152                 mult_enc = R2I_DLY_ENC_3;
2153         else
2154                 mult_enc = R2I_DLY_ENC_0;
2155
2156         aic->prev_eqd = eqd;
2157
2158         return mult_enc;
2159 }
2160
2161 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2162 {
2163         struct be_set_eqd set_eqd[MAX_EVT_QS];
2164         struct be_aic_obj *aic;
2165         struct be_eq_obj *eqo;
2166         int i, num = 0, eqd;
2167
2168         for_all_evt_queues(adapter, eqo, i) {
2169                 aic = &adapter->aic_obj[eqo->idx];
2170                 eqd = be_get_new_eqd(eqo);
2171                 if (force_update || eqd != aic->prev_eqd) {
2172                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2173                         set_eqd[num].eq_id = eqo->q.id;
2174                         aic->prev_eqd = eqd;
2175                         num++;
2176                 }
2177         }
2178
2179         if (num)
2180                 be_cmd_modify_eqd(adapter, set_eqd, num);
2181 }
2182
2183 static void be_rx_stats_update(struct be_rx_obj *rxo,
2184                                struct be_rx_compl_info *rxcp)
2185 {
2186         struct be_rx_stats *stats = rx_stats(rxo);
2187
2188         u64_stats_update_begin(&stats->sync);
2189         stats->rx_compl++;
2190         stats->rx_bytes += rxcp->pkt_size;
2191         stats->rx_pkts++;
2192         if (rxcp->tunneled)
2193                 stats->rx_vxlan_offload_pkts++;
2194         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2195                 stats->rx_mcast_pkts++;
2196         if (rxcp->err)
2197                 stats->rx_compl_err++;
2198         u64_stats_update_end(&stats->sync);
2199 }
2200
2201 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2202 {
2203         /* L4 checksum is not reliable for non TCP/UDP packets.
2204          * Also ignore ipcksm for ipv6 pkts
2205          */
2206         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2207                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2208 }
2209
2210 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2211 {
2212         struct be_adapter *adapter = rxo->adapter;
2213         struct be_rx_page_info *rx_page_info;
2214         struct be_queue_info *rxq = &rxo->q;
2215         u32 frag_idx = rxq->tail;
2216
2217         rx_page_info = &rxo->page_info_tbl[frag_idx];
2218         BUG_ON(!rx_page_info->page);
2219
2220         if (rx_page_info->last_frag) {
2221                 dma_unmap_page(&adapter->pdev->dev,
2222                                dma_unmap_addr(rx_page_info, bus),
2223                                adapter->big_page_size, DMA_FROM_DEVICE);
2224                 rx_page_info->last_frag = false;
2225         } else {
2226                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2227                                         dma_unmap_addr(rx_page_info, bus),
2228                                         rx_frag_size, DMA_FROM_DEVICE);
2229         }
2230
2231         queue_tail_inc(rxq);
2232         atomic_dec(&rxq->used);
2233         return rx_page_info;
2234 }
2235
2236 /* Throwaway the data in the Rx completion */
2237 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2238                                 struct be_rx_compl_info *rxcp)
2239 {
2240         struct be_rx_page_info *page_info;
2241         u16 i, num_rcvd = rxcp->num_rcvd;
2242
2243         for (i = 0; i < num_rcvd; i++) {
2244                 page_info = get_rx_page_info(rxo);
2245                 put_page(page_info->page);
2246                 memset(page_info, 0, sizeof(*page_info));
2247         }
2248 }
2249
2250 /*
2251  * skb_fill_rx_data forms a complete skb for an ether frame
2252  * indicated by rxcp.
2253  */
2254 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2255                              struct be_rx_compl_info *rxcp)
2256 {
2257         struct be_rx_page_info *page_info;
2258         u16 i, j;
2259         u16 hdr_len, curr_frag_len, remaining;
2260         u8 *start;
2261
2262         page_info = get_rx_page_info(rxo);
2263         start = page_address(page_info->page) + page_info->page_offset;
2264         prefetch(start);
2265
2266         /* Copy data in the first descriptor of this completion */
2267         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2268
2269         skb->len = curr_frag_len;
2270         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2271                 memcpy(skb->data, start, curr_frag_len);
2272                 /* Complete packet has now been moved to data */
2273                 put_page(page_info->page);
2274                 skb->data_len = 0;
2275                 skb->tail += curr_frag_len;
2276         } else {
2277                 hdr_len = ETH_HLEN;
2278                 memcpy(skb->data, start, hdr_len);
2279                 skb_shinfo(skb)->nr_frags = 1;
2280                 skb_frag_set_page(skb, 0, page_info->page);
2281                 skb_shinfo(skb)->frags[0].page_offset =
2282                                         page_info->page_offset + hdr_len;
2283                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2284                                   curr_frag_len - hdr_len);
2285                 skb->data_len = curr_frag_len - hdr_len;
2286                 skb->truesize += rx_frag_size;
2287                 skb->tail += hdr_len;
2288         }
2289         page_info->page = NULL;
2290
2291         if (rxcp->pkt_size <= rx_frag_size) {
2292                 BUG_ON(rxcp->num_rcvd != 1);
2293                 return;
2294         }
2295
2296         /* More frags present for this completion */
2297         remaining = rxcp->pkt_size - curr_frag_len;
2298         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2299                 page_info = get_rx_page_info(rxo);
2300                 curr_frag_len = min(remaining, rx_frag_size);
2301
2302                 /* Coalesce all frags from the same physical page in one slot */
2303                 if (page_info->page_offset == 0) {
2304                         /* Fresh page */
2305                         j++;
2306                         skb_frag_set_page(skb, j, page_info->page);
2307                         skb_shinfo(skb)->frags[j].page_offset =
2308                                                         page_info->page_offset;
2309                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2310                         skb_shinfo(skb)->nr_frags++;
2311                 } else {
2312                         put_page(page_info->page);
2313                 }
2314
2315                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2316                 skb->len += curr_frag_len;
2317                 skb->data_len += curr_frag_len;
2318                 skb->truesize += rx_frag_size;
2319                 remaining -= curr_frag_len;
2320                 page_info->page = NULL;
2321         }
2322         BUG_ON(j > MAX_SKB_FRAGS);
2323 }
2324
2325 /* Process the RX completion indicated by rxcp when GRO is disabled */
2326 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2327                                 struct be_rx_compl_info *rxcp)
2328 {
2329         struct be_adapter *adapter = rxo->adapter;
2330         struct net_device *netdev = adapter->netdev;
2331         struct sk_buff *skb;
2332
2333         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2334         if (unlikely(!skb)) {
2335                 rx_stats(rxo)->rx_drops_no_skbs++;
2336                 be_rx_compl_discard(rxo, rxcp);
2337                 return;
2338         }
2339
2340         skb_fill_rx_data(rxo, skb, rxcp);
2341
2342         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2343                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2344         else
2345                 skb_checksum_none_assert(skb);
2346
2347         skb->protocol = eth_type_trans(skb, netdev);
2348         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2349         if (netdev->features & NETIF_F_RXHASH)
2350                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2351
2352         skb->csum_level = rxcp->tunneled;
2353         skb_mark_napi_id(skb, napi);
2354
2355         if (rxcp->vlanf)
2356                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2357
2358         netif_receive_skb(skb);
2359 }
2360
2361 /* Process the RX completion indicated by rxcp when GRO is enabled */
2362 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2363                                     struct napi_struct *napi,
2364                                     struct be_rx_compl_info *rxcp)
2365 {
2366         struct be_adapter *adapter = rxo->adapter;
2367         struct be_rx_page_info *page_info;
2368         struct sk_buff *skb = NULL;
2369         u16 remaining, curr_frag_len;
2370         u16 i, j;
2371
2372         skb = napi_get_frags(napi);
2373         if (!skb) {
2374                 be_rx_compl_discard(rxo, rxcp);
2375                 return;
2376         }
2377
2378         remaining = rxcp->pkt_size;
2379         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2380                 page_info = get_rx_page_info(rxo);
2381
2382                 curr_frag_len = min(remaining, rx_frag_size);
2383
2384                 /* Coalesce all frags from the same physical page in one slot */
2385                 if (i == 0 || page_info->page_offset == 0) {
2386                         /* First frag or Fresh page */
2387                         j++;
2388                         skb_frag_set_page(skb, j, page_info->page);
2389                         skb_shinfo(skb)->frags[j].page_offset =
2390                                                         page_info->page_offset;
2391                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2392                 } else {
2393                         put_page(page_info->page);
2394                 }
2395                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2396                 skb->truesize += rx_frag_size;
2397                 remaining -= curr_frag_len;
2398                 memset(page_info, 0, sizeof(*page_info));
2399         }
2400         BUG_ON(j > MAX_SKB_FRAGS);
2401
2402         skb_shinfo(skb)->nr_frags = j + 1;
2403         skb->len = rxcp->pkt_size;
2404         skb->data_len = rxcp->pkt_size;
2405         skb->ip_summed = CHECKSUM_UNNECESSARY;
2406         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2407         if (adapter->netdev->features & NETIF_F_RXHASH)
2408                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2409
2410         skb->csum_level = rxcp->tunneled;
2411
2412         if (rxcp->vlanf)
2413                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2414
2415         napi_gro_frags(napi);
2416 }
2417
2418 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2419                                  struct be_rx_compl_info *rxcp)
2420 {
2421         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2422         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2423         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2424         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2425         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2426         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2427         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2428         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2429         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2430         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2431         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2432         if (rxcp->vlanf) {
2433                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2434                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2435         }
2436         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2437         rxcp->tunneled =
2438                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2439 }
2440
2441 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2442                                  struct be_rx_compl_info *rxcp)
2443 {
2444         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2445         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2446         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2447         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2448         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2449         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2450         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2451         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2452         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2453         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2454         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2455         if (rxcp->vlanf) {
2456                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2457                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2458         }
2459         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2460         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2461 }
2462
2463 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2464 {
2465         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2466         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2467         struct be_adapter *adapter = rxo->adapter;
2468
2469         /* For checking the valid bit it is Ok to use either definition as the
2470          * valid bit is at the same position in both v0 and v1 Rx compl */
2471         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2472                 return NULL;
2473
2474         rmb();
2475         be_dws_le_to_cpu(compl, sizeof(*compl));
2476
2477         if (adapter->be3_native)
2478                 be_parse_rx_compl_v1(compl, rxcp);
2479         else
2480                 be_parse_rx_compl_v0(compl, rxcp);
2481
2482         if (rxcp->ip_frag)
2483                 rxcp->l4_csum = 0;
2484
2485         if (rxcp->vlanf) {
2486                 /* In QNQ modes, if qnq bit is not set, then the packet was
2487                  * tagged only with the transparent outer vlan-tag and must
2488                  * not be treated as a vlan packet by host
2489                  */
2490                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2491                         rxcp->vlanf = 0;
2492
2493                 if (!lancer_chip(adapter))
2494                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2495
2496                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2497                     !test_bit(rxcp->vlan_tag, adapter->vids))
2498                         rxcp->vlanf = 0;
2499         }
2500
2501         /* As the compl has been parsed, reset it; we wont touch it again */
2502         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2503
2504         queue_tail_inc(&rxo->cq);
2505         return rxcp;
2506 }
2507
2508 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2509 {
2510         u32 order = get_order(size);
2511
2512         if (order > 0)
2513                 gfp |= __GFP_COMP;
2514         return  alloc_pages(gfp, order);
2515 }
2516
2517 /*
2518  * Allocate a page, split it to fragments of size rx_frag_size and post as
2519  * receive buffers to BE
2520  */
2521 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2522 {
2523         struct be_adapter *adapter = rxo->adapter;
2524         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2525         struct be_queue_info *rxq = &rxo->q;
2526         struct page *pagep = NULL;
2527         struct device *dev = &adapter->pdev->dev;
2528         struct be_eth_rx_d *rxd;
2529         u64 page_dmaaddr = 0, frag_dmaaddr;
2530         u32 posted, page_offset = 0, notify = 0;
2531
2532         page_info = &rxo->page_info_tbl[rxq->head];
2533         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2534                 if (!pagep) {
2535                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2536                         if (unlikely(!pagep)) {
2537                                 rx_stats(rxo)->rx_post_fail++;
2538                                 break;
2539                         }
2540                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2541                                                     adapter->big_page_size,
2542                                                     DMA_FROM_DEVICE);
2543                         if (dma_mapping_error(dev, page_dmaaddr)) {
2544                                 put_page(pagep);
2545                                 pagep = NULL;
2546                                 adapter->drv_stats.dma_map_errors++;
2547                                 break;
2548                         }
2549                         page_offset = 0;
2550                 } else {
2551                         get_page(pagep);
2552                         page_offset += rx_frag_size;
2553                 }
2554                 page_info->page_offset = page_offset;
2555                 page_info->page = pagep;
2556
2557                 rxd = queue_head_node(rxq);
2558                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2559                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2560                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2561
2562                 /* Any space left in the current big page for another frag? */
2563                 if ((page_offset + rx_frag_size + rx_frag_size) >
2564                                         adapter->big_page_size) {
2565                         pagep = NULL;
2566                         page_info->last_frag = true;
2567                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2568                 } else {
2569                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2570                 }
2571
2572                 prev_page_info = page_info;
2573                 queue_head_inc(rxq);
2574                 page_info = &rxo->page_info_tbl[rxq->head];
2575         }
2576
2577         /* Mark the last frag of a page when we break out of the above loop
2578          * with no more slots available in the RXQ
2579          */
2580         if (pagep) {
2581                 prev_page_info->last_frag = true;
2582                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2583         }
2584
2585         if (posted) {
2586                 atomic_add(posted, &rxq->used);
2587                 if (rxo->rx_post_starved)
2588                         rxo->rx_post_starved = false;
2589                 do {
2590                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2591                         be_rxq_notify(adapter, rxq->id, notify);
2592                         posted -= notify;
2593                 } while (posted);
2594         } else if (atomic_read(&rxq->used) == 0) {
2595                 /* Let be_worker replenish when memory is available */
2596                 rxo->rx_post_starved = true;
2597         }
2598 }
2599
2600 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2601 {
2602         struct be_queue_info *tx_cq = &txo->cq;
2603         struct be_tx_compl_info *txcp = &txo->txcp;
2604         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2605
2606         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2607                 return NULL;
2608
2609         /* Ensure load ordering of valid bit dword and other dwords below */
2610         rmb();
2611         be_dws_le_to_cpu(compl, sizeof(*compl));
2612
2613         txcp->status = GET_TX_COMPL_BITS(status, compl);
2614         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2615
2616         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2617         queue_tail_inc(tx_cq);
2618         return txcp;
2619 }
2620
2621 static u16 be_tx_compl_process(struct be_adapter *adapter,
2622                                struct be_tx_obj *txo, u16 last_index)
2623 {
2624         struct sk_buff **sent_skbs = txo->sent_skb_list;
2625         struct be_queue_info *txq = &txo->q;
2626         struct sk_buff *skb = NULL;
2627         bool unmap_skb_hdr = false;
2628         struct be_eth_wrb *wrb;
2629         u16 num_wrbs = 0;
2630         u32 frag_index;
2631
2632         do {
2633                 if (sent_skbs[txq->tail]) {
2634                         /* Free skb from prev req */
2635                         if (skb)
2636                                 dev_consume_skb_any(skb);
2637                         skb = sent_skbs[txq->tail];
2638                         sent_skbs[txq->tail] = NULL;
2639                         queue_tail_inc(txq);  /* skip hdr wrb */
2640                         num_wrbs++;
2641                         unmap_skb_hdr = true;
2642                 }
2643                 wrb = queue_tail_node(txq);
2644                 frag_index = txq->tail;
2645                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2646                               (unmap_skb_hdr && skb_headlen(skb)));
2647                 unmap_skb_hdr = false;
2648                 queue_tail_inc(txq);
2649                 num_wrbs++;
2650         } while (frag_index != last_index);
2651         dev_consume_skb_any(skb);
2652
2653         return num_wrbs;
2654 }
2655
2656 /* Return the number of events in the event queue */
2657 static inline int events_get(struct be_eq_obj *eqo)
2658 {
2659         struct be_eq_entry *eqe;
2660         int num = 0;
2661
2662         do {
2663                 eqe = queue_tail_node(&eqo->q);
2664                 if (eqe->evt == 0)
2665                         break;
2666
2667                 rmb();
2668                 eqe->evt = 0;
2669                 num++;
2670                 queue_tail_inc(&eqo->q);
2671         } while (true);
2672
2673         return num;
2674 }
2675
2676 /* Leaves the EQ is disarmed state */
2677 static void be_eq_clean(struct be_eq_obj *eqo)
2678 {
2679         int num = events_get(eqo);
2680
2681         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2682 }
2683
2684 /* Free posted rx buffers that were not used */
2685 static void be_rxq_clean(struct be_rx_obj *rxo)
2686 {
2687         struct be_queue_info *rxq = &rxo->q;
2688         struct be_rx_page_info *page_info;
2689
2690         while (atomic_read(&rxq->used) > 0) {
2691                 page_info = get_rx_page_info(rxo);
2692                 put_page(page_info->page);
2693                 memset(page_info, 0, sizeof(*page_info));
2694         }
2695         BUG_ON(atomic_read(&rxq->used));
2696         rxq->tail = 0;
2697         rxq->head = 0;
2698 }
2699
2700 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2701 {
2702         struct be_queue_info *rx_cq = &rxo->cq;
2703         struct be_rx_compl_info *rxcp;
2704         struct be_adapter *adapter = rxo->adapter;
2705         int flush_wait = 0;
2706
2707         /* Consume pending rx completions.
2708          * Wait for the flush completion (identified by zero num_rcvd)
2709          * to arrive. Notify CQ even when there are no more CQ entries
2710          * for HW to flush partially coalesced CQ entries.
2711          * In Lancer, there is no need to wait for flush compl.
2712          */
2713         for (;;) {
2714                 rxcp = be_rx_compl_get(rxo);
2715                 if (!rxcp) {
2716                         if (lancer_chip(adapter))
2717                                 break;
2718
2719                         if (flush_wait++ > 50 ||
2720                             be_check_error(adapter,
2721                                            BE_ERROR_HW)) {
2722                                 dev_warn(&adapter->pdev->dev,
2723                                          "did not receive flush compl\n");
2724                                 break;
2725                         }
2726                         be_cq_notify(adapter, rx_cq->id, true, 0);
2727                         mdelay(1);
2728                 } else {
2729                         be_rx_compl_discard(rxo, rxcp);
2730                         be_cq_notify(adapter, rx_cq->id, false, 1);
2731                         if (rxcp->num_rcvd == 0)
2732                                 break;
2733                 }
2734         }
2735
2736         /* After cleanup, leave the CQ in unarmed state */
2737         be_cq_notify(adapter, rx_cq->id, false, 0);
2738 }
2739
2740 static void be_tx_compl_clean(struct be_adapter *adapter)
2741 {
2742         struct device *dev = &adapter->pdev->dev;
2743         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2744         struct be_tx_compl_info *txcp;
2745         struct be_queue_info *txq;
2746         u32 end_idx, notified_idx;
2747         struct be_tx_obj *txo;
2748         int i, pending_txqs;
2749
2750         /* Stop polling for compls when HW has been silent for 10ms */
2751         do {
2752                 pending_txqs = adapter->num_tx_qs;
2753
2754                 for_all_tx_queues(adapter, txo, i) {
2755                         cmpl = 0;
2756                         num_wrbs = 0;
2757                         txq = &txo->q;
2758                         while ((txcp = be_tx_compl_get(txo))) {
2759                                 num_wrbs +=
2760                                         be_tx_compl_process(adapter, txo,
2761                                                             txcp->end_index);
2762                                 cmpl++;
2763                         }
2764                         if (cmpl) {
2765                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2766                                 atomic_sub(num_wrbs, &txq->used);
2767                                 timeo = 0;
2768                         }
2769                         if (!be_is_tx_compl_pending(txo))
2770                                 pending_txqs--;
2771                 }
2772
2773                 if (pending_txqs == 0 || ++timeo > 10 ||
2774                     be_check_error(adapter, BE_ERROR_HW))
2775                         break;
2776
2777                 mdelay(1);
2778         } while (true);
2779
2780         /* Free enqueued TX that was never notified to HW */
2781         for_all_tx_queues(adapter, txo, i) {
2782                 txq = &txo->q;
2783
2784                 if (atomic_read(&txq->used)) {
2785                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2786                                  i, atomic_read(&txq->used));
2787                         notified_idx = txq->tail;
2788                         end_idx = txq->tail;
2789                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2790                                   txq->len);
2791                         /* Use the tx-compl process logic to handle requests
2792                          * that were not sent to the HW.
2793                          */
2794                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2795                         atomic_sub(num_wrbs, &txq->used);
2796                         BUG_ON(atomic_read(&txq->used));
2797                         txo->pend_wrb_cnt = 0;
2798                         /* Since hw was never notified of these requests,
2799                          * reset TXQ indices
2800                          */
2801                         txq->head = notified_idx;
2802                         txq->tail = notified_idx;
2803                 }
2804         }
2805 }
2806
2807 static void be_evt_queues_destroy(struct be_adapter *adapter)
2808 {
2809         struct be_eq_obj *eqo;
2810         int i;
2811
2812         for_all_evt_queues(adapter, eqo, i) {
2813                 if (eqo->q.created) {
2814                         be_eq_clean(eqo);
2815                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2816                         napi_hash_del(&eqo->napi);
2817                         netif_napi_del(&eqo->napi);
2818                         free_cpumask_var(eqo->affinity_mask);
2819                 }
2820                 be_queue_free(adapter, &eqo->q);
2821         }
2822 }
2823
2824 static int be_evt_queues_create(struct be_adapter *adapter)
2825 {
2826         struct be_queue_info *eq;
2827         struct be_eq_obj *eqo;
2828         struct be_aic_obj *aic;
2829         int i, rc;
2830
2831         /* need enough EQs to service both RX and TX queues */
2832         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2833                                     max(adapter->cfg_num_rx_irqs,
2834                                         adapter->cfg_num_tx_irqs));
2835
2836         for_all_evt_queues(adapter, eqo, i) {
2837                 int numa_node = dev_to_node(&adapter->pdev->dev);
2838
2839                 aic = &adapter->aic_obj[i];
2840                 eqo->adapter = adapter;
2841                 eqo->idx = i;
2842                 aic->max_eqd = BE_MAX_EQD;
2843                 aic->enable = true;
2844
2845                 eq = &eqo->q;
2846                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2847                                     sizeof(struct be_eq_entry));
2848                 if (rc)
2849                         return rc;
2850
2851                 rc = be_cmd_eq_create(adapter, eqo);
2852                 if (rc)
2853                         return rc;
2854
2855                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2856                         return -ENOMEM;
2857                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2858                                 eqo->affinity_mask);
2859                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2860                                BE_NAPI_WEIGHT);
2861         }
2862         return 0;
2863 }
2864
2865 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2866 {
2867         struct be_queue_info *q;
2868
2869         q = &adapter->mcc_obj.q;
2870         if (q->created)
2871                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2872         be_queue_free(adapter, q);
2873
2874         q = &adapter->mcc_obj.cq;
2875         if (q->created)
2876                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2877         be_queue_free(adapter, q);
2878 }
2879
2880 /* Must be called only after TX qs are created as MCC shares TX EQ */
2881 static int be_mcc_queues_create(struct be_adapter *adapter)
2882 {
2883         struct be_queue_info *q, *cq;
2884
2885         cq = &adapter->mcc_obj.cq;
2886         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2887                            sizeof(struct be_mcc_compl)))
2888                 goto err;
2889
2890         /* Use the default EQ for MCC completions */
2891         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2892                 goto mcc_cq_free;
2893
2894         q = &adapter->mcc_obj.q;
2895         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2896                 goto mcc_cq_destroy;
2897
2898         if (be_cmd_mccq_create(adapter, q, cq))
2899                 goto mcc_q_free;
2900
2901         return 0;
2902
2903 mcc_q_free:
2904         be_queue_free(adapter, q);
2905 mcc_cq_destroy:
2906         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2907 mcc_cq_free:
2908         be_queue_free(adapter, cq);
2909 err:
2910         return -1;
2911 }
2912
2913 static void be_tx_queues_destroy(struct be_adapter *adapter)
2914 {
2915         struct be_queue_info *q;
2916         struct be_tx_obj *txo;
2917         u8 i;
2918
2919         for_all_tx_queues(adapter, txo, i) {
2920                 q = &txo->q;
2921                 if (q->created)
2922                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2923                 be_queue_free(adapter, q);
2924
2925                 q = &txo->cq;
2926                 if (q->created)
2927                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2928                 be_queue_free(adapter, q);
2929         }
2930 }
2931
2932 static int be_tx_qs_create(struct be_adapter *adapter)
2933 {
2934         struct be_queue_info *cq;
2935         struct be_tx_obj *txo;
2936         struct be_eq_obj *eqo;
2937         int status, i;
2938
2939         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2940
2941         for_all_tx_queues(adapter, txo, i) {
2942                 cq = &txo->cq;
2943                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2944                                         sizeof(struct be_eth_tx_compl));
2945                 if (status)
2946                         return status;
2947
2948                 u64_stats_init(&txo->stats.sync);
2949                 u64_stats_init(&txo->stats.sync_compl);
2950
2951                 /* If num_evt_qs is less than num_tx_qs, then more than
2952                  * one txq share an eq
2953                  */
2954                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2955                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2956                 if (status)
2957                         return status;
2958
2959                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2960                                         sizeof(struct be_eth_wrb));
2961                 if (status)
2962                         return status;
2963
2964                 status = be_cmd_txq_create(adapter, txo);
2965                 if (status)
2966                         return status;
2967
2968                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2969                                     eqo->idx);
2970         }
2971
2972         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2973                  adapter->num_tx_qs);
2974         return 0;
2975 }
2976
2977 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2978 {
2979         struct be_queue_info *q;
2980         struct be_rx_obj *rxo;
2981         int i;
2982
2983         for_all_rx_queues(adapter, rxo, i) {
2984                 q = &rxo->cq;
2985                 if (q->created)
2986                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2987                 be_queue_free(adapter, q);
2988         }
2989 }
2990
2991 static int be_rx_cqs_create(struct be_adapter *adapter)
2992 {
2993         struct be_queue_info *eq, *cq;
2994         struct be_rx_obj *rxo;
2995         int rc, i;
2996
2997         adapter->num_rss_qs =
2998                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2999
3000         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3001         if (adapter->num_rss_qs < 2)
3002                 adapter->num_rss_qs = 0;
3003
3004         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3005
3006         /* When the interface is not capable of RSS rings (and there is no
3007          * need to create a default RXQ) we'll still need one RXQ
3008          */
3009         if (adapter->num_rx_qs == 0)
3010                 adapter->num_rx_qs = 1;
3011
3012         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3013         for_all_rx_queues(adapter, rxo, i) {
3014                 rxo->adapter = adapter;
3015                 cq = &rxo->cq;
3016                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3017                                     sizeof(struct be_eth_rx_compl));
3018                 if (rc)
3019                         return rc;
3020
3021                 u64_stats_init(&rxo->stats.sync);
3022                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3023                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3024                 if (rc)
3025                         return rc;
3026         }
3027
3028         dev_info(&adapter->pdev->dev,
3029                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3030         return 0;
3031 }
3032
3033 static irqreturn_t be_intx(int irq, void *dev)
3034 {
3035         struct be_eq_obj *eqo = dev;
3036         struct be_adapter *adapter = eqo->adapter;
3037         int num_evts = 0;
3038
3039         /* IRQ is not expected when NAPI is scheduled as the EQ
3040          * will not be armed.
3041          * But, this can happen on Lancer INTx where it takes
3042          * a while to de-assert INTx or in BE2 where occasionaly
3043          * an interrupt may be raised even when EQ is unarmed.
3044          * If NAPI is already scheduled, then counting & notifying
3045          * events will orphan them.
3046          */
3047         if (napi_schedule_prep(&eqo->napi)) {
3048                 num_evts = events_get(eqo);
3049                 __napi_schedule(&eqo->napi);
3050                 if (num_evts)
3051                         eqo->spurious_intr = 0;
3052         }
3053         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3054
3055         /* Return IRQ_HANDLED only for the the first spurious intr
3056          * after a valid intr to stop the kernel from branding
3057          * this irq as a bad one!
3058          */
3059         if (num_evts || eqo->spurious_intr++ == 0)
3060                 return IRQ_HANDLED;
3061         else
3062                 return IRQ_NONE;
3063 }
3064
3065 static irqreturn_t be_msix(int irq, void *dev)
3066 {
3067         struct be_eq_obj *eqo = dev;
3068
3069         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3070         napi_schedule(&eqo->napi);
3071         return IRQ_HANDLED;
3072 }
3073
3074 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3075 {
3076         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3077 }
3078
3079 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3080                          int budget, int polling)
3081 {
3082         struct be_adapter *adapter = rxo->adapter;
3083         struct be_queue_info *rx_cq = &rxo->cq;
3084         struct be_rx_compl_info *rxcp;
3085         u32 work_done;
3086         u32 frags_consumed = 0;
3087
3088         for (work_done = 0; work_done < budget; work_done++) {
3089                 rxcp = be_rx_compl_get(rxo);
3090                 if (!rxcp)
3091                         break;
3092
3093                 /* Is it a flush compl that has no data */
3094                 if (unlikely(rxcp->num_rcvd == 0))
3095                         goto loop_continue;
3096
3097                 /* Discard compl with partial DMA Lancer B0 */
3098                 if (unlikely(!rxcp->pkt_size)) {
3099                         be_rx_compl_discard(rxo, rxcp);
3100                         goto loop_continue;
3101                 }
3102
3103                 /* On BE drop pkts that arrive due to imperfect filtering in
3104                  * promiscuous mode on some skews
3105                  */
3106                 if (unlikely(rxcp->port != adapter->port_num &&
3107                              !lancer_chip(adapter))) {
3108                         be_rx_compl_discard(rxo, rxcp);
3109                         goto loop_continue;
3110                 }
3111
3112                 /* Don't do gro when we're busy_polling */
3113                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3114                         be_rx_compl_process_gro(rxo, napi, rxcp);
3115                 else
3116                         be_rx_compl_process(rxo, napi, rxcp);
3117
3118 loop_continue:
3119                 frags_consumed += rxcp->num_rcvd;
3120                 be_rx_stats_update(rxo, rxcp);
3121         }
3122
3123         if (work_done) {
3124                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3125
3126                 /* When an rx-obj gets into post_starved state, just
3127                  * let be_worker do the posting.
3128                  */
3129                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3130                     !rxo->rx_post_starved)
3131                         be_post_rx_frags(rxo, GFP_ATOMIC,
3132                                          max_t(u32, MAX_RX_POST,
3133                                                frags_consumed));
3134         }
3135
3136         return work_done;
3137 }
3138
3139 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141         switch (status) {
3142         case BE_TX_COMP_HDR_PARSE_ERR:
3143                 tx_stats(txo)->tx_hdr_parse_err++;
3144                 break;
3145         case BE_TX_COMP_NDMA_ERR:
3146                 tx_stats(txo)->tx_dma_err++;
3147                 break;
3148         case BE_TX_COMP_ACL_ERR:
3149                 tx_stats(txo)->tx_spoof_check_err++;
3150                 break;
3151         }
3152 }
3153
3154 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3155 {
3156         switch (status) {
3157         case LANCER_TX_COMP_LSO_ERR:
3158                 tx_stats(txo)->tx_tso_err++;
3159                 break;
3160         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3161         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3162                 tx_stats(txo)->tx_spoof_check_err++;
3163                 break;
3164         case LANCER_TX_COMP_QINQ_ERR:
3165                 tx_stats(txo)->tx_qinq_err++;
3166                 break;
3167         case LANCER_TX_COMP_PARITY_ERR:
3168                 tx_stats(txo)->tx_internal_parity_err++;
3169                 break;
3170         case LANCER_TX_COMP_DMA_ERR:
3171                 tx_stats(txo)->tx_dma_err++;
3172                 break;
3173         }
3174 }
3175
3176 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3177                           int idx)
3178 {
3179         int num_wrbs = 0, work_done = 0;
3180         struct be_tx_compl_info *txcp;
3181
3182         while ((txcp = be_tx_compl_get(txo))) {
3183                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3184                 work_done++;
3185
3186                 if (txcp->status) {
3187                         if (lancer_chip(adapter))
3188                                 lancer_update_tx_err(txo, txcp->status);
3189                         else
3190                                 be_update_tx_err(txo, txcp->status);
3191                 }
3192         }
3193
3194         if (work_done) {
3195                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3196                 atomic_sub(num_wrbs, &txo->q.used);
3197
3198                 /* As Tx wrbs have been freed up, wake up netdev queue
3199                  * if it was stopped due to lack of tx wrbs.  */
3200                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3201                     be_can_txq_wake(txo)) {
3202                         netif_wake_subqueue(adapter->netdev, idx);
3203                 }
3204
3205                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3206                 tx_stats(txo)->tx_compl += work_done;
3207                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3208         }
3209 }
3210
3211 #ifdef CONFIG_NET_RX_BUSY_POLL
3212 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3213 {
3214         bool status = true;
3215
3216         spin_lock(&eqo->lock); /* BH is already disabled */
3217         if (eqo->state & BE_EQ_LOCKED) {
3218                 WARN_ON(eqo->state & BE_EQ_NAPI);
3219                 eqo->state |= BE_EQ_NAPI_YIELD;
3220                 status = false;
3221         } else {
3222                 eqo->state = BE_EQ_NAPI;
3223         }
3224         spin_unlock(&eqo->lock);
3225         return status;
3226 }
3227
3228 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3229 {
3230         spin_lock(&eqo->lock); /* BH is already disabled */
3231
3232         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3233         eqo->state = BE_EQ_IDLE;
3234
3235         spin_unlock(&eqo->lock);
3236 }
3237
3238 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3239 {
3240         bool status = true;
3241
3242         spin_lock_bh(&eqo->lock);
3243         if (eqo->state & BE_EQ_LOCKED) {
3244                 eqo->state |= BE_EQ_POLL_YIELD;
3245                 status = false;
3246         } else {
3247                 eqo->state |= BE_EQ_POLL;
3248         }
3249         spin_unlock_bh(&eqo->lock);
3250         return status;
3251 }
3252
3253 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3254 {
3255         spin_lock_bh(&eqo->lock);
3256
3257         WARN_ON(eqo->state & (BE_EQ_NAPI));
3258         eqo->state = BE_EQ_IDLE;
3259
3260         spin_unlock_bh(&eqo->lock);
3261 }
3262
3263 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3264 {
3265         spin_lock_init(&eqo->lock);
3266         eqo->state = BE_EQ_IDLE;
3267 }
3268
3269 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3270 {
3271         local_bh_disable();
3272
3273         /* It's enough to just acquire napi lock on the eqo to stop
3274          * be_busy_poll() from processing any queueus.
3275          */
3276         while (!be_lock_napi(eqo))
3277                 mdelay(1);
3278
3279         local_bh_enable();
3280 }
3281
3282 #else /* CONFIG_NET_RX_BUSY_POLL */
3283
3284 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3285 {
3286         return true;
3287 }
3288
3289 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3290 {
3291 }
3292
3293 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3294 {
3295         return false;
3296 }
3297
3298 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3299 {
3300 }
3301
3302 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3303 {
3304 }
3305
3306 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3307 {
3308 }
3309 #endif /* CONFIG_NET_RX_BUSY_POLL */
3310
3311 int be_poll(struct napi_struct *napi, int budget)
3312 {
3313         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3314         struct be_adapter *adapter = eqo->adapter;
3315         int max_work = 0, work, i, num_evts;
3316         struct be_rx_obj *rxo;
3317         struct be_tx_obj *txo;
3318         u32 mult_enc = 0;
3319
3320         num_evts = events_get(eqo);
3321
3322         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3323                 be_process_tx(adapter, txo, i);
3324
3325         if (be_lock_napi(eqo)) {
3326                 /* This loop will iterate twice for EQ0 in which
3327                  * completions of the last RXQ (default one) are also processed
3328                  * For other EQs the loop iterates only once
3329                  */
3330                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3331                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3332                         max_work = max(work, max_work);
3333                 }
3334                 be_unlock_napi(eqo);
3335         } else {
3336                 max_work = budget;
3337         }
3338
3339         if (is_mcc_eqo(eqo))
3340                 be_process_mcc(adapter);
3341
3342         if (max_work < budget) {
3343                 napi_complete(napi);
3344
3345                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3346                  * delay via a delay multiplier encoding value
3347                  */
3348                 if (skyhawk_chip(adapter))
3349                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3350
3351                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3352                              mult_enc);
3353         } else {
3354                 /* As we'll continue in polling mode, count and clear events */
3355                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3356         }
3357         return max_work;
3358 }
3359
3360 #ifdef CONFIG_NET_RX_BUSY_POLL
3361 static int be_busy_poll(struct napi_struct *napi)
3362 {
3363         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3364         struct be_adapter *adapter = eqo->adapter;
3365         struct be_rx_obj *rxo;
3366         int i, work = 0;
3367
3368         if (!be_lock_busy_poll(eqo))
3369                 return LL_FLUSH_BUSY;
3370
3371         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3372                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3373                 if (work)
3374                         break;
3375         }
3376
3377         be_unlock_busy_poll(eqo);
3378         return work;
3379 }
3380 #endif
3381
3382 void be_detect_error(struct be_adapter *adapter)
3383 {
3384         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3385         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3386         u32 i;
3387         struct device *dev = &adapter->pdev->dev;
3388
3389         if (be_check_error(adapter, BE_ERROR_HW))
3390                 return;
3391
3392         if (lancer_chip(adapter)) {
3393                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3394                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3395                         be_set_error(adapter, BE_ERROR_UE);
3396                         sliport_err1 = ioread32(adapter->db +
3397                                                 SLIPORT_ERROR1_OFFSET);
3398                         sliport_err2 = ioread32(adapter->db +
3399                                                 SLIPORT_ERROR2_OFFSET);
3400                         /* Do not log error messages if its a FW reset */
3401                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3402                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3403                                 dev_info(dev, "Firmware update in progress\n");
3404                         } else {
3405                                 dev_err(dev, "Error detected in the card\n");
3406                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3407                                         sliport_status);
3408                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3409                                         sliport_err1);
3410                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3411                                         sliport_err2);
3412                         }
3413                 }
3414         } else {
3415                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3416                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3417                 ue_lo_mask = ioread32(adapter->pcicfg +
3418                                       PCICFG_UE_STATUS_LOW_MASK);
3419                 ue_hi_mask = ioread32(adapter->pcicfg +
3420                                       PCICFG_UE_STATUS_HI_MASK);
3421
3422                 ue_lo = (ue_lo & ~ue_lo_mask);
3423                 ue_hi = (ue_hi & ~ue_hi_mask);
3424
3425                 /* On certain platforms BE hardware can indicate spurious UEs.
3426                  * Allow HW to stop working completely in case of a real UE.
3427                  * Hence not setting the hw_error for UE detection.
3428                  */
3429
3430                 if (ue_lo || ue_hi) {
3431                         dev_err(dev, "Error detected in the adapter");
3432                         if (skyhawk_chip(adapter))
3433                                 be_set_error(adapter, BE_ERROR_UE);
3434
3435                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3436                                 if (ue_lo & 1)
3437                                         dev_err(dev, "UE: %s bit set\n",
3438                                                 ue_status_low_desc[i]);
3439                         }
3440                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3441                                 if (ue_hi & 1)
3442                                         dev_err(dev, "UE: %s bit set\n",
3443                                                 ue_status_hi_desc[i]);
3444                         }
3445                 }
3446         }
3447 }
3448
3449 static void be_msix_disable(struct be_adapter *adapter)
3450 {
3451         if (msix_enabled(adapter)) {
3452                 pci_disable_msix(adapter->pdev);
3453                 adapter->num_msix_vec = 0;
3454                 adapter->num_msix_roce_vec = 0;
3455         }
3456 }
3457
3458 static int be_msix_enable(struct be_adapter *adapter)
3459 {
3460         unsigned int i, max_roce_eqs;
3461         struct device *dev = &adapter->pdev->dev;
3462         int num_vec;
3463
3464         /* If RoCE is supported, program the max number of vectors that
3465          * could be used for NIC and RoCE, else, just program the number
3466          * we'll use initially.
3467          */
3468         if (be_roce_supported(adapter)) {
3469                 max_roce_eqs =
3470                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3471                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3472                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3473         } else {
3474                 num_vec = max(adapter->cfg_num_rx_irqs,
3475                               adapter->cfg_num_tx_irqs);
3476         }
3477
3478         for (i = 0; i < num_vec; i++)
3479                 adapter->msix_entries[i].entry = i;
3480
3481         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3482                                         MIN_MSIX_VECTORS, num_vec);
3483         if (num_vec < 0)
3484                 goto fail;
3485
3486         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3487                 adapter->num_msix_roce_vec = num_vec / 2;
3488                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3489                          adapter->num_msix_roce_vec);
3490         }
3491
3492         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3493
3494         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3495                  adapter->num_msix_vec);
3496         return 0;
3497
3498 fail:
3499         dev_warn(dev, "MSIx enable failed\n");
3500
3501         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3502         if (be_virtfn(adapter))
3503                 return num_vec;
3504         return 0;
3505 }
3506
3507 static inline int be_msix_vec_get(struct be_adapter *adapter,
3508                                   struct be_eq_obj *eqo)
3509 {
3510         return adapter->msix_entries[eqo->msix_idx].vector;
3511 }
3512
3513 static int be_msix_register(struct be_adapter *adapter)
3514 {
3515         struct net_device *netdev = adapter->netdev;
3516         struct be_eq_obj *eqo;
3517         int status, i, vec;
3518
3519         for_all_evt_queues(adapter, eqo, i) {
3520                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3521                 vec = be_msix_vec_get(adapter, eqo);
3522                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3523                 if (status)
3524                         goto err_msix;
3525
3526                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3527         }
3528
3529         return 0;
3530 err_msix:
3531         for (i--; i >= 0; i--) {
3532                 eqo = &adapter->eq_obj[i];
3533                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3534         }
3535         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3536                  status);
3537         be_msix_disable(adapter);
3538         return status;
3539 }
3540
3541 static int be_irq_register(struct be_adapter *adapter)
3542 {
3543         struct net_device *netdev = adapter->netdev;
3544         int status;
3545
3546         if (msix_enabled(adapter)) {
3547                 status = be_msix_register(adapter);
3548                 if (status == 0)
3549                         goto done;
3550                 /* INTx is not supported for VF */
3551                 if (be_virtfn(adapter))
3552                         return status;
3553         }
3554
3555         /* INTx: only the first EQ is used */
3556         netdev->irq = adapter->pdev->irq;
3557         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3558                              &adapter->eq_obj[0]);
3559         if (status) {
3560                 dev_err(&adapter->pdev->dev,
3561                         "INTx request IRQ failed - err %d\n", status);
3562                 return status;
3563         }
3564 done:
3565         adapter->isr_registered = true;
3566         return 0;
3567 }
3568
3569 static void be_irq_unregister(struct be_adapter *adapter)
3570 {
3571         struct net_device *netdev = adapter->netdev;
3572         struct be_eq_obj *eqo;
3573         int i, vec;
3574
3575         if (!adapter->isr_registered)
3576                 return;
3577
3578         /* INTx */
3579         if (!msix_enabled(adapter)) {
3580                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3581                 goto done;
3582         }
3583
3584         /* MSIx */
3585         for_all_evt_queues(adapter, eqo, i) {
3586                 vec = be_msix_vec_get(adapter, eqo);
3587                 irq_set_affinity_hint(vec, NULL);
3588                 free_irq(vec, eqo);
3589         }
3590
3591 done:
3592         adapter->isr_registered = false;
3593 }
3594
3595 static void be_rx_qs_destroy(struct be_adapter *adapter)
3596 {
3597         struct rss_info *rss = &adapter->rss_info;
3598         struct be_queue_info *q;
3599         struct be_rx_obj *rxo;
3600         int i;
3601
3602         for_all_rx_queues(adapter, rxo, i) {
3603                 q = &rxo->q;
3604                 if (q->created) {
3605                         /* If RXQs are destroyed while in an "out of buffer"
3606                          * state, there is a possibility of an HW stall on
3607                          * Lancer. So, post 64 buffers to each queue to relieve
3608                          * the "out of buffer" condition.
3609                          * Make sure there's space in the RXQ before posting.
3610                          */
3611                         if (lancer_chip(adapter)) {
3612                                 be_rx_cq_clean(rxo);
3613                                 if (atomic_read(&q->used) == 0)
3614                                         be_post_rx_frags(rxo, GFP_KERNEL,
3615                                                          MAX_RX_POST);
3616                         }
3617
3618                         be_cmd_rxq_destroy(adapter, q);
3619                         be_rx_cq_clean(rxo);
3620                         be_rxq_clean(rxo);
3621                 }
3622                 be_queue_free(adapter, q);
3623         }
3624
3625         if (rss->rss_flags) {
3626                 rss->rss_flags = RSS_ENABLE_NONE;
3627                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3628                                   128, rss->rss_hkey);
3629         }
3630 }
3631
3632 static void be_disable_if_filters(struct be_adapter *adapter)
3633 {
3634         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3635         be_clear_uc_list(adapter);
3636         be_clear_mc_list(adapter);
3637
3638         /* The IFACE flags are enabled in the open path and cleared
3639          * in the close path. When a VF gets detached from the host and
3640          * assigned to a VM the following happens:
3641          *      - VF's IFACE flags get cleared in the detach path
3642          *      - IFACE create is issued by the VF in the attach path
3643          * Due to a bug in the BE3/Skyhawk-R FW
3644          * (Lancer FW doesn't have the bug), the IFACE capability flags
3645          * specified along with the IFACE create cmd issued by a VF are not
3646          * honoured by FW.  As a consequence, if a *new* driver
3647          * (that enables/disables IFACE flags in open/close)
3648          * is loaded in the host and an *old* driver is * used by a VM/VF,
3649          * the IFACE gets created *without* the needed flags.
3650          * To avoid this, disable RX-filter flags only for Lancer.
3651          */
3652         if (lancer_chip(adapter)) {
3653                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3654                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3655         }
3656 }
3657
3658 static int be_close(struct net_device *netdev)
3659 {
3660         struct be_adapter *adapter = netdev_priv(netdev);
3661         struct be_eq_obj *eqo;
3662         int i;
3663
3664         /* This protection is needed as be_close() may be called even when the
3665          * adapter is in cleared state (after eeh perm failure)
3666          */
3667         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3668                 return 0;
3669
3670         /* Before attempting cleanup ensure all the pending cmds in the
3671          * config_wq have finished execution
3672          */
3673         flush_workqueue(be_wq);
3674
3675         be_disable_if_filters(adapter);
3676
3677         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3678                 for_all_evt_queues(adapter, eqo, i) {
3679                         napi_disable(&eqo->napi);
3680                         be_disable_busy_poll(eqo);
3681                 }
3682                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3683         }
3684
3685         be_async_mcc_disable(adapter);
3686
3687         /* Wait for all pending tx completions to arrive so that
3688          * all tx skbs are freed.
3689          */
3690         netif_tx_disable(netdev);
3691         be_tx_compl_clean(adapter);
3692
3693         be_rx_qs_destroy(adapter);
3694
3695         for_all_evt_queues(adapter, eqo, i) {
3696                 if (msix_enabled(adapter))
3697                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3698                 else
3699                         synchronize_irq(netdev->irq);
3700                 be_eq_clean(eqo);
3701         }
3702
3703         be_irq_unregister(adapter);
3704
3705         return 0;
3706 }
3707
3708 static int be_rx_qs_create(struct be_adapter *adapter)
3709 {
3710         struct rss_info *rss = &adapter->rss_info;
3711         u8 rss_key[RSS_HASH_KEY_LEN];
3712         struct be_rx_obj *rxo;
3713         int rc, i, j;
3714
3715         for_all_rx_queues(adapter, rxo, i) {
3716                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3717                                     sizeof(struct be_eth_rx_d));
3718                 if (rc)
3719                         return rc;
3720         }
3721
3722         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3723                 rxo = default_rxo(adapter);
3724                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3725                                        rx_frag_size, adapter->if_handle,
3726                                        false, &rxo->rss_id);
3727                 if (rc)
3728                         return rc;
3729         }
3730
3731         for_all_rss_queues(adapter, rxo, i) {
3732                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3733                                        rx_frag_size, adapter->if_handle,
3734                                        true, &rxo->rss_id);
3735                 if (rc)
3736                         return rc;
3737         }
3738
3739         if (be_multi_rxq(adapter)) {
3740                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3741                         for_all_rss_queues(adapter, rxo, i) {
3742                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3743                                         break;
3744                                 rss->rsstable[j + i] = rxo->rss_id;
3745                                 rss->rss_queue[j + i] = i;
3746                         }
3747                 }
3748                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3749                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3750
3751                 if (!BEx_chip(adapter))
3752                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3753                                 RSS_ENABLE_UDP_IPV6;
3754
3755                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3756                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3757                                        RSS_INDIR_TABLE_LEN, rss_key);
3758                 if (rc) {
3759                         rss->rss_flags = RSS_ENABLE_NONE;
3760                         return rc;
3761                 }
3762
3763                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3764         } else {
3765                 /* Disable RSS, if only default RX Q is created */
3766                 rss->rss_flags = RSS_ENABLE_NONE;
3767         }
3768
3769
3770         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3771          * which is a queue empty condition
3772          */
3773         for_all_rx_queues(adapter, rxo, i)
3774                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3775
3776         return 0;
3777 }
3778
3779 static int be_enable_if_filters(struct be_adapter *adapter)
3780 {
3781         int status;
3782
3783         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3784         if (status)
3785                 return status;
3786
3787         /* For BE3 VFs, the PF programs the initial MAC address */
3788         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3789                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3790                 if (status)
3791                         return status;
3792                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3793         }
3794
3795         if (adapter->vlans_added)
3796                 be_vid_config(adapter);
3797
3798         __be_set_rx_mode(adapter);
3799
3800         return 0;
3801 }
3802
3803 static int be_open(struct net_device *netdev)
3804 {
3805         struct be_adapter *adapter = netdev_priv(netdev);
3806         struct be_eq_obj *eqo;
3807         struct be_rx_obj *rxo;
3808         struct be_tx_obj *txo;
3809         u8 link_status;
3810         int status, i;
3811
3812         status = be_rx_qs_create(adapter);
3813         if (status)
3814                 goto err;
3815
3816         status = be_enable_if_filters(adapter);
3817         if (status)
3818                 goto err;
3819
3820         status = be_irq_register(adapter);
3821         if (status)
3822                 goto err;
3823
3824         for_all_rx_queues(adapter, rxo, i)
3825                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3826
3827         for_all_tx_queues(adapter, txo, i)
3828                 be_cq_notify(adapter, txo->cq.id, true, 0);
3829
3830         be_async_mcc_enable(adapter);
3831
3832         for_all_evt_queues(adapter, eqo, i) {
3833                 napi_enable(&eqo->napi);
3834                 be_enable_busy_poll(eqo);
3835                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3836         }
3837         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3838
3839         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3840         if (!status)
3841                 be_link_status_update(adapter, link_status);
3842
3843         netif_tx_start_all_queues(netdev);
3844         if (skyhawk_chip(adapter))
3845                 udp_tunnel_get_rx_info(netdev);
3846
3847         return 0;
3848 err:
3849         be_close(adapter->netdev);
3850         return -EIO;
3851 }
3852
3853 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3854 {
3855         u32 addr;
3856
3857         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3858
3859         mac[5] = (u8)(addr & 0xFF);
3860         mac[4] = (u8)((addr >> 8) & 0xFF);
3861         mac[3] = (u8)((addr >> 16) & 0xFF);
3862         /* Use the OUI from the current MAC address */
3863         memcpy(mac, adapter->netdev->dev_addr, 3);
3864 }
3865
3866 /*
3867  * Generate a seed MAC address from the PF MAC Address using jhash.
3868  * MAC Address for VFs are assigned incrementally starting from the seed.
3869  * These addresses are programmed in the ASIC by the PF and the VF driver
3870  * queries for the MAC address during its probe.
3871  */
3872 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3873 {
3874         u32 vf;
3875         int status = 0;
3876         u8 mac[ETH_ALEN];
3877         struct be_vf_cfg *vf_cfg;
3878
3879         be_vf_eth_addr_generate(adapter, mac);
3880
3881         for_all_vfs(adapter, vf_cfg, vf) {
3882                 if (BEx_chip(adapter))
3883                         status = be_cmd_pmac_add(adapter, mac,
3884                                                  vf_cfg->if_handle,
3885                                                  &vf_cfg->pmac_id, vf + 1);
3886                 else
3887                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3888                                                 vf + 1);
3889
3890                 if (status)
3891                         dev_err(&adapter->pdev->dev,
3892                                 "Mac address assignment failed for VF %d\n",
3893                                 vf);
3894                 else
3895                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3896
3897                 mac[5] += 1;
3898         }
3899         return status;
3900 }
3901
3902 static int be_vfs_mac_query(struct be_adapter *adapter)
3903 {
3904         int status, vf;
3905         u8 mac[ETH_ALEN];
3906         struct be_vf_cfg *vf_cfg;
3907
3908         for_all_vfs(adapter, vf_cfg, vf) {
3909                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3910                                                mac, vf_cfg->if_handle,
3911                                                false, vf+1);
3912                 if (status)
3913                         return status;
3914                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3915         }
3916         return 0;
3917 }
3918
3919 static void be_vf_clear(struct be_adapter *adapter)
3920 {
3921         struct be_vf_cfg *vf_cfg;
3922         u32 vf;
3923
3924         if (pci_vfs_assigned(adapter->pdev)) {
3925                 dev_warn(&adapter->pdev->dev,
3926                          "VFs are assigned to VMs: not disabling VFs\n");
3927                 goto done;
3928         }
3929
3930         pci_disable_sriov(adapter->pdev);
3931
3932         for_all_vfs(adapter, vf_cfg, vf) {
3933                 if (BEx_chip(adapter))
3934                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3935                                         vf_cfg->pmac_id, vf + 1);
3936                 else
3937                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3938                                        vf + 1);
3939
3940                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3941         }
3942
3943         if (BE3_chip(adapter))
3944                 be_cmd_set_hsw_config(adapter, 0, 0,
3945                                       adapter->if_handle,
3946                                       PORT_FWD_TYPE_PASSTHRU, 0);
3947 done:
3948         kfree(adapter->vf_cfg);
3949         adapter->num_vfs = 0;
3950         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3951 }
3952
3953 static void be_clear_queues(struct be_adapter *adapter)
3954 {
3955         be_mcc_queues_destroy(adapter);
3956         be_rx_cqs_destroy(adapter);
3957         be_tx_queues_destroy(adapter);
3958         be_evt_queues_destroy(adapter);
3959 }
3960
3961 static void be_cancel_worker(struct be_adapter *adapter)
3962 {
3963         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3964                 cancel_delayed_work_sync(&adapter->work);
3965                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3966         }
3967 }
3968
3969 static void be_cancel_err_detection(struct be_adapter *adapter)
3970 {
3971         struct be_error_recovery *err_rec = &adapter->error_recovery;
3972
3973         if (!be_err_recovery_workq)
3974                 return;
3975
3976         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3977                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3978                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3979         }
3980 }
3981
3982 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3983 {
3984         struct net_device *netdev = adapter->netdev;
3985
3986         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3987                 be_cmd_manage_iface(adapter, adapter->if_handle,
3988                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3989
3990         if (adapter->vxlan_port)
3991                 be_cmd_set_vxlan_port(adapter, 0);
3992
3993         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3994         adapter->vxlan_port = 0;
3995
3996         netdev->hw_enc_features = 0;
3997         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3998         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3999 }
4000
4001 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4002                                 struct be_resources *vft_res)
4003 {
4004         struct be_resources res = adapter->pool_res;
4005         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4006         struct be_resources res_mod = {0};
4007         u16 num_vf_qs = 1;
4008
4009         /* Distribute the queue resources among the PF and it's VFs */
4010         if (num_vfs) {
4011                 /* Divide the rx queues evenly among the VFs and the PF, capped
4012                  * at VF-EQ-count. Any remainder queues belong to the PF.
4013                  */
4014                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4015                                 res.max_rss_qs / (num_vfs + 1));
4016
4017                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4018                  * RSS Tables per port. Provide RSS on VFs, only if number of
4019                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4020                  */
4021                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4022                         num_vf_qs = 1;
4023         }
4024
4025         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4026          * which are modifiable using SET_PROFILE_CONFIG cmd.
4027          */
4028         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4029                                   RESOURCE_MODIFIABLE, 0);
4030
4031         /* If RSS IFACE capability flags are modifiable for a VF, set the
4032          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4033          * more than 1 RSSQ is available for a VF.
4034          * Otherwise, provision only 1 queue pair for VF.
4035          */
4036         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4037                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4038                 if (num_vf_qs > 1) {
4039                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4040                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4041                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4042                 } else {
4043                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4044                                              BE_IF_FLAGS_DEFQ_RSS);
4045                 }
4046         } else {
4047                 num_vf_qs = 1;
4048         }
4049
4050         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4051                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4052                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4053         }
4054
4055         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4056         vft_res->max_rx_qs = num_vf_qs;
4057         vft_res->max_rss_qs = num_vf_qs;
4058         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4059         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4060
4061         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4062          * among the PF and it's VFs, if the fields are changeable
4063          */
4064         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4065                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4066
4067         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4068                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4069
4070         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4071                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4072
4073         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4074                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4075 }
4076
4077 static void be_if_destroy(struct be_adapter *adapter)
4078 {
4079         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4080
4081         kfree(adapter->pmac_id);
4082         adapter->pmac_id = NULL;
4083
4084         kfree(adapter->mc_list);
4085         adapter->mc_list = NULL;
4086
4087         kfree(adapter->uc_list);
4088         adapter->uc_list = NULL;
4089 }
4090
4091 static int be_clear(struct be_adapter *adapter)
4092 {
4093         struct pci_dev *pdev = adapter->pdev;
4094         struct  be_resources vft_res = {0};
4095
4096         be_cancel_worker(adapter);
4097
4098         flush_workqueue(be_wq);
4099
4100         if (sriov_enabled(adapter))
4101                 be_vf_clear(adapter);
4102
4103         /* Re-configure FW to distribute resources evenly across max-supported
4104          * number of VFs, only when VFs are not already enabled.
4105          */
4106         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4107             !pci_vfs_assigned(pdev)) {
4108                 be_calculate_vf_res(adapter,
4109                                     pci_sriov_get_totalvfs(pdev),
4110                                     &vft_res);
4111                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4112                                         pci_sriov_get_totalvfs(pdev),
4113                                         &vft_res);
4114         }
4115
4116         be_disable_vxlan_offloads(adapter);
4117
4118         be_if_destroy(adapter);
4119
4120         be_clear_queues(adapter);
4121
4122         be_msix_disable(adapter);
4123         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4124         return 0;
4125 }
4126
4127 static int be_vfs_if_create(struct be_adapter *adapter)
4128 {
4129         struct be_resources res = {0};
4130         u32 cap_flags, en_flags, vf;
4131         struct be_vf_cfg *vf_cfg;
4132         int status;
4133
4134         /* If a FW profile exists, then cap_flags are updated */
4135         cap_flags = BE_VF_IF_EN_FLAGS;
4136
4137         for_all_vfs(adapter, vf_cfg, vf) {
4138                 if (!BE3_chip(adapter)) {
4139                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4140                                                            ACTIVE_PROFILE_TYPE,
4141                                                            RESOURCE_LIMITS,
4142                                                            vf + 1);
4143                         if (!status) {
4144                                 cap_flags = res.if_cap_flags;
4145                                 /* Prevent VFs from enabling VLAN promiscuous
4146                                  * mode
4147                                  */
4148                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4149                         }
4150                 }
4151
4152                 /* PF should enable IF flags during proxy if_create call */
4153                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4154                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4155                                           &vf_cfg->if_handle, vf + 1);
4156                 if (status)
4157                         return status;
4158         }
4159
4160         return 0;
4161 }
4162
4163 static int be_vf_setup_init(struct be_adapter *adapter)
4164 {
4165         struct be_vf_cfg *vf_cfg;
4166         int vf;
4167
4168         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4169                                   GFP_KERNEL);
4170         if (!adapter->vf_cfg)
4171                 return -ENOMEM;
4172
4173         for_all_vfs(adapter, vf_cfg, vf) {
4174                 vf_cfg->if_handle = -1;
4175                 vf_cfg->pmac_id = -1;
4176         }
4177         return 0;
4178 }
4179
4180 static int be_vf_setup(struct be_adapter *adapter)
4181 {
4182         struct device *dev = &adapter->pdev->dev;
4183         struct be_vf_cfg *vf_cfg;
4184         int status, old_vfs, vf;
4185         bool spoofchk;
4186
4187         old_vfs = pci_num_vf(adapter->pdev);
4188
4189         status = be_vf_setup_init(adapter);
4190         if (status)
4191                 goto err;
4192
4193         if (old_vfs) {
4194                 for_all_vfs(adapter, vf_cfg, vf) {
4195                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4196                         if (status)
4197                                 goto err;
4198                 }
4199
4200                 status = be_vfs_mac_query(adapter);
4201                 if (status)
4202                         goto err;
4203         } else {
4204                 status = be_vfs_if_create(adapter);
4205                 if (status)
4206                         goto err;
4207
4208                 status = be_vf_eth_addr_config(adapter);
4209                 if (status)
4210                         goto err;
4211         }
4212
4213         for_all_vfs(adapter, vf_cfg, vf) {
4214                 /* Allow VFs to programs MAC/VLAN filters */
4215                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4216                                                   vf + 1);
4217                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4218                         status = be_cmd_set_fn_privileges(adapter,
4219                                                           vf_cfg->privileges |
4220                                                           BE_PRIV_FILTMGMT,
4221                                                           vf + 1);
4222                         if (!status) {
4223                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4224                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4225                                          vf);
4226                         }
4227                 }
4228
4229                 /* Allow full available bandwidth */
4230                 if (!old_vfs)
4231                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4232
4233                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4234                                                vf_cfg->if_handle, NULL,
4235                                                &spoofchk);
4236                 if (!status)
4237                         vf_cfg->spoofchk = spoofchk;
4238
4239                 if (!old_vfs) {
4240                         be_cmd_enable_vf(adapter, vf + 1);
4241                         be_cmd_set_logical_link_config(adapter,
4242                                                        IFLA_VF_LINK_STATE_AUTO,
4243                                                        vf+1);
4244                 }
4245         }
4246
4247         if (!old_vfs) {
4248                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4249                 if (status) {
4250                         dev_err(dev, "SRIOV enable failed\n");
4251                         adapter->num_vfs = 0;
4252                         goto err;
4253                 }
4254         }
4255
4256         if (BE3_chip(adapter)) {
4257                 /* On BE3, enable VEB only when SRIOV is enabled */
4258                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4259                                                adapter->if_handle,
4260                                                PORT_FWD_TYPE_VEB, 0);
4261                 if (status)
4262                         goto err;
4263         }
4264
4265         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4266         return 0;
4267 err:
4268         dev_err(dev, "VF setup failed\n");
4269         be_vf_clear(adapter);
4270         return status;
4271 }
4272
4273 /* Converting function_mode bits on BE3 to SH mc_type enums */
4274
4275 static u8 be_convert_mc_type(u32 function_mode)
4276 {
4277         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4278                 return vNIC1;
4279         else if (function_mode & QNQ_MODE)
4280                 return FLEX10;
4281         else if (function_mode & VNIC_MODE)
4282                 return vNIC2;
4283         else if (function_mode & UMC_ENABLED)
4284                 return UMC;
4285         else
4286                 return MC_NONE;
4287 }
4288
4289 /* On BE2/BE3 FW does not suggest the supported limits */
4290 static void BEx_get_resources(struct be_adapter *adapter,
4291                               struct be_resources *res)
4292 {
4293         bool use_sriov = adapter->num_vfs ? 1 : 0;
4294
4295         if (be_physfn(adapter))
4296                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4297         else
4298                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4299
4300         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4301
4302         if (be_is_mc(adapter)) {
4303                 /* Assuming that there are 4 channels per port,
4304                  * when multi-channel is enabled
4305                  */
4306                 if (be_is_qnq_mode(adapter))
4307                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4308                 else
4309                         /* In a non-qnq multichannel mode, the pvid
4310                          * takes up one vlan entry
4311                          */
4312                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4313         } else {
4314                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4315         }
4316
4317         res->max_mcast_mac = BE_MAX_MC;
4318
4319         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4320          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4321          *    *only* if it is RSS-capable.
4322          */
4323         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4324             be_virtfn(adapter) ||
4325             (be_is_mc(adapter) &&
4326              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4327                 res->max_tx_qs = 1;
4328         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4329                 struct be_resources super_nic_res = {0};
4330
4331                 /* On a SuperNIC profile, the driver needs to use the
4332                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4333                  */
4334                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4335                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4336                                           0);
4337                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4338                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4339         } else {
4340                 res->max_tx_qs = BE3_MAX_TX_QS;
4341         }
4342
4343         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4344             !use_sriov && be_physfn(adapter))
4345                 res->max_rss_qs = (adapter->be3_native) ?
4346                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4347         res->max_rx_qs = res->max_rss_qs + 1;
4348
4349         if (be_physfn(adapter))
4350                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4351                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4352         else
4353                 res->max_evt_qs = 1;
4354
4355         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4356         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4357         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4358                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4359 }
4360
4361 static void be_setup_init(struct be_adapter *adapter)
4362 {
4363         adapter->vlan_prio_bmap = 0xff;
4364         adapter->phy.link_speed = -1;
4365         adapter->if_handle = -1;
4366         adapter->be3_native = false;
4367         adapter->if_flags = 0;
4368         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4369         if (be_physfn(adapter))
4370                 adapter->cmd_privileges = MAX_PRIVILEGES;
4371         else
4372                 adapter->cmd_privileges = MIN_PRIVILEGES;
4373 }
4374
4375 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4376  * However, this HW limitation is not exposed to the host via any SLI cmd.
4377  * As a result, in the case of SRIOV and in particular multi-partition configs
4378  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4379  * for distribution between the VFs. This self-imposed limit will determine the
4380  * no: of VFs for which RSS can be enabled.
4381  */
4382 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4383 {
4384         struct be_port_resources port_res = {0};
4385         u8 rss_tables_on_port;
4386         u16 max_vfs = be_max_vfs(adapter);
4387
4388         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4389                                   RESOURCE_LIMITS, 0);
4390
4391         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4392
4393         /* Each PF Pool's RSS Tables limit =
4394          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4395          */
4396         adapter->pool_res.max_rss_tables =
4397                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4398 }
4399
4400 static int be_get_sriov_config(struct be_adapter *adapter)
4401 {
4402         struct be_resources res = {0};
4403         int max_vfs, old_vfs;
4404
4405         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4406                                   RESOURCE_LIMITS, 0);
4407
4408         /* Some old versions of BE3 FW don't report max_vfs value */
4409         if (BE3_chip(adapter) && !res.max_vfs) {
4410                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4411                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4412         }
4413
4414         adapter->pool_res = res;
4415
4416         /* If during previous unload of the driver, the VFs were not disabled,
4417          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4418          * Instead use the TotalVFs value stored in the pci-dev struct.
4419          */
4420         old_vfs = pci_num_vf(adapter->pdev);
4421         if (old_vfs) {
4422                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4423                          old_vfs);
4424
4425                 adapter->pool_res.max_vfs =
4426                         pci_sriov_get_totalvfs(adapter->pdev);
4427                 adapter->num_vfs = old_vfs;
4428         }
4429
4430         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4431                 be_calculate_pf_pool_rss_tables(adapter);
4432                 dev_info(&adapter->pdev->dev,
4433                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4434                          be_max_pf_pool_rss_tables(adapter));
4435         }
4436         return 0;
4437 }
4438
4439 static void be_alloc_sriov_res(struct be_adapter *adapter)
4440 {
4441         int old_vfs = pci_num_vf(adapter->pdev);
4442         struct  be_resources vft_res = {0};
4443         int status;
4444
4445         be_get_sriov_config(adapter);
4446
4447         if (!old_vfs)
4448                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4449
4450         /* When the HW is in SRIOV capable configuration, the PF-pool
4451          * resources are given to PF during driver load, if there are no
4452          * old VFs. This facility is not available in BE3 FW.
4453          * Also, this is done by FW in Lancer chip.
4454          */
4455         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4456                 be_calculate_vf_res(adapter, 0, &vft_res);
4457                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4458                                                  &vft_res);
4459                 if (status)
4460                         dev_err(&adapter->pdev->dev,
4461                                 "Failed to optimize SRIOV resources\n");
4462         }
4463 }
4464
4465 static int be_get_resources(struct be_adapter *adapter)
4466 {
4467         struct device *dev = &adapter->pdev->dev;
4468         struct be_resources res = {0};
4469         int status;
4470
4471         /* For Lancer, SH etc read per-function resource limits from FW.
4472          * GET_FUNC_CONFIG returns per function guaranteed limits.
4473          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4474          */
4475         if (BEx_chip(adapter)) {
4476                 BEx_get_resources(adapter, &res);
4477         } else {
4478                 status = be_cmd_get_func_config(adapter, &res);
4479                 if (status)
4480                         return status;
4481
4482                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4483                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4484                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4485                         res.max_rss_qs -= 1;
4486         }
4487
4488         /* If RoCE is supported stash away half the EQs for RoCE */
4489         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4490                                 res.max_evt_qs / 2 : res.max_evt_qs;
4491         adapter->res = res;
4492
4493         /* If FW supports RSS default queue, then skip creating non-RSS
4494          * queue for non-IP traffic.
4495          */
4496         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4497                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4498
4499         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4500                  be_max_txqs(adapter), be_max_rxqs(adapter),
4501                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4502                  be_max_vfs(adapter));
4503         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4504                  be_max_uc(adapter), be_max_mc(adapter),
4505                  be_max_vlans(adapter));
4506
4507         /* Ensure RX and TX queues are created in pairs at init time */
4508         adapter->cfg_num_rx_irqs =
4509                                 min_t(u16, netif_get_num_default_rss_queues(),
4510                                       be_max_qp_irqs(adapter));
4511         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4512         return 0;
4513 }
4514
4515 static int be_get_config(struct be_adapter *adapter)
4516 {
4517         int status, level;
4518         u16 profile_id;
4519
4520         status = be_cmd_get_cntl_attributes(adapter);
4521         if (status)
4522                 return status;
4523
4524         status = be_cmd_query_fw_cfg(adapter);
4525         if (status)
4526                 return status;
4527
4528         if (!lancer_chip(adapter) && be_physfn(adapter))
4529                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4530
4531         if (BEx_chip(adapter)) {
4532                 level = be_cmd_get_fw_log_level(adapter);
4533                 adapter->msg_enable =
4534                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4535         }
4536
4537         be_cmd_get_acpi_wol_cap(adapter);
4538         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4539         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4540
4541         be_cmd_query_port_name(adapter);
4542
4543         if (be_physfn(adapter)) {
4544                 status = be_cmd_get_active_profile(adapter, &profile_id);
4545                 if (!status)
4546                         dev_info(&adapter->pdev->dev,
4547                                  "Using profile 0x%x\n", profile_id);
4548         }
4549
4550         return 0;
4551 }
4552
4553 static int be_mac_setup(struct be_adapter *adapter)
4554 {
4555         u8 mac[ETH_ALEN];
4556         int status;
4557
4558         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4559                 status = be_cmd_get_perm_mac(adapter, mac);
4560                 if (status)
4561                         return status;
4562
4563                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4564                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4565         }
4566
4567         return 0;
4568 }
4569
4570 static void be_schedule_worker(struct be_adapter *adapter)
4571 {
4572         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4573         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4574 }
4575
4576 static void be_destroy_err_recovery_workq(void)
4577 {
4578         if (!be_err_recovery_workq)
4579                 return;
4580
4581         flush_workqueue(be_err_recovery_workq);
4582         destroy_workqueue(be_err_recovery_workq);
4583         be_err_recovery_workq = NULL;
4584 }
4585
4586 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4587 {
4588         struct be_error_recovery *err_rec = &adapter->error_recovery;
4589
4590         if (!be_err_recovery_workq)
4591                 return;
4592
4593         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4594                            msecs_to_jiffies(delay));
4595         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4596 }
4597
4598 static int be_setup_queues(struct be_adapter *adapter)
4599 {
4600         struct net_device *netdev = adapter->netdev;
4601         int status;
4602
4603         status = be_evt_queues_create(adapter);
4604         if (status)
4605                 goto err;
4606
4607         status = be_tx_qs_create(adapter);
4608         if (status)
4609                 goto err;
4610
4611         status = be_rx_cqs_create(adapter);
4612         if (status)
4613                 goto err;
4614
4615         status = be_mcc_queues_create(adapter);
4616         if (status)
4617                 goto err;
4618
4619         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4620         if (status)
4621                 goto err;
4622
4623         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4624         if (status)
4625                 goto err;
4626
4627         return 0;
4628 err:
4629         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4630         return status;
4631 }
4632
4633 static int be_if_create(struct be_adapter *adapter)
4634 {
4635         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4636         u32 cap_flags = be_if_cap_flags(adapter);
4637         int status;
4638
4639         /* alloc required memory for other filtering fields */
4640         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4641                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4642         if (!adapter->pmac_id)
4643                 return -ENOMEM;
4644
4645         adapter->mc_list = kcalloc(be_max_mc(adapter),
4646                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4647         if (!adapter->mc_list)
4648                 return -ENOMEM;
4649
4650         adapter->uc_list = kcalloc(be_max_uc(adapter),
4651                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4652         if (!adapter->uc_list)
4653                 return -ENOMEM;
4654
4655         if (adapter->cfg_num_rx_irqs == 1)
4656                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4657
4658         en_flags &= cap_flags;
4659         /* will enable all the needed filter flags in be_open() */
4660         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4661                                   &adapter->if_handle, 0);
4662
4663         if (status)
4664                 return status;
4665
4666         return 0;
4667 }
4668
4669 int be_update_queues(struct be_adapter *adapter)
4670 {
4671         struct net_device *netdev = adapter->netdev;
4672         int status;
4673
4674         if (netif_running(netdev))
4675                 be_close(netdev);
4676
4677         be_cancel_worker(adapter);
4678
4679         /* If any vectors have been shared with RoCE we cannot re-program
4680          * the MSIx table.
4681          */
4682         if (!adapter->num_msix_roce_vec)
4683                 be_msix_disable(adapter);
4684
4685         be_clear_queues(adapter);
4686         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4687         if (status)
4688                 return status;
4689
4690         if (!msix_enabled(adapter)) {
4691                 status = be_msix_enable(adapter);
4692                 if (status)
4693                         return status;
4694         }
4695
4696         status = be_if_create(adapter);
4697         if (status)
4698                 return status;
4699
4700         status = be_setup_queues(adapter);
4701         if (status)
4702                 return status;
4703
4704         be_schedule_worker(adapter);
4705
4706         if (netif_running(netdev))
4707                 status = be_open(netdev);
4708
4709         return status;
4710 }
4711
4712 static inline int fw_major_num(const char *fw_ver)
4713 {
4714         int fw_major = 0, i;
4715
4716         i = sscanf(fw_ver, "%d.", &fw_major);
4717         if (i != 1)
4718                 return 0;
4719
4720         return fw_major;
4721 }
4722
4723 /* If it is error recovery, FLR the PF
4724  * Else if any VFs are already enabled don't FLR the PF
4725  */
4726 static bool be_reset_required(struct be_adapter *adapter)
4727 {
4728         if (be_error_recovering(adapter))
4729                 return true;
4730         else
4731                 return pci_num_vf(adapter->pdev) == 0;
4732 }
4733
4734 /* Wait for the FW to be ready and perform the required initialization */
4735 static int be_func_init(struct be_adapter *adapter)
4736 {
4737         int status;
4738
4739         status = be_fw_wait_ready(adapter);
4740         if (status)
4741                 return status;
4742
4743         /* FW is now ready; clear errors to allow cmds/doorbell */
4744         be_clear_error(adapter, BE_CLEAR_ALL);
4745
4746         if (be_reset_required(adapter)) {
4747                 status = be_cmd_reset_function(adapter);
4748                 if (status)
4749                         return status;
4750
4751                 /* Wait for interrupts to quiesce after an FLR */
4752                 msleep(100);
4753         }
4754
4755         /* Tell FW we're ready to fire cmds */
4756         status = be_cmd_fw_init(adapter);
4757         if (status)
4758                 return status;
4759
4760         /* Allow interrupts for other ULPs running on NIC function */
4761         be_intr_set(adapter, true);
4762
4763         return 0;
4764 }
4765
4766 static int be_setup(struct be_adapter *adapter)
4767 {
4768         struct device *dev = &adapter->pdev->dev;
4769         int status;
4770
4771         status = be_func_init(adapter);
4772         if (status)
4773                 return status;
4774
4775         be_setup_init(adapter);
4776
4777         if (!lancer_chip(adapter))
4778                 be_cmd_req_native_mode(adapter);
4779
4780         /* invoke this cmd first to get pf_num and vf_num which are needed
4781          * for issuing profile related cmds
4782          */
4783         if (!BEx_chip(adapter)) {
4784                 status = be_cmd_get_func_config(adapter, NULL);
4785                 if (status)
4786                         return status;
4787         }
4788
4789         status = be_get_config(adapter);
4790         if (status)
4791                 goto err;
4792
4793         if (!BE2_chip(adapter) && be_physfn(adapter))
4794                 be_alloc_sriov_res(adapter);
4795
4796         status = be_get_resources(adapter);
4797         if (status)
4798                 goto err;
4799
4800         status = be_msix_enable(adapter);
4801         if (status)
4802                 goto err;
4803
4804         /* will enable all the needed filter flags in be_open() */
4805         status = be_if_create(adapter);
4806         if (status)
4807                 goto err;
4808
4809         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4810         rtnl_lock();
4811         status = be_setup_queues(adapter);
4812         rtnl_unlock();
4813         if (status)
4814                 goto err;
4815
4816         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4817
4818         status = be_mac_setup(adapter);
4819         if (status)
4820                 goto err;
4821
4822         be_cmd_get_fw_ver(adapter);
4823         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4824
4825         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4826                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4827                         adapter->fw_ver);
4828                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4829         }
4830
4831         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4832                                          adapter->rx_fc);
4833         if (status)
4834                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4835                                         &adapter->rx_fc);
4836
4837         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4838                  adapter->tx_fc, adapter->rx_fc);
4839
4840         if (be_physfn(adapter))
4841                 be_cmd_set_logical_link_config(adapter,
4842                                                IFLA_VF_LINK_STATE_AUTO, 0);
4843
4844         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4845          * confusing a linux bridge or OVS that it might be connected to.
4846          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4847          * when SRIOV is not enabled.
4848          */
4849         if (BE3_chip(adapter))
4850                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4851                                       PORT_FWD_TYPE_PASSTHRU, 0);
4852
4853         if (adapter->num_vfs)
4854                 be_vf_setup(adapter);
4855
4856         status = be_cmd_get_phy_info(adapter);
4857         if (!status && be_pause_supported(adapter))
4858                 adapter->phy.fc_autoneg = 1;
4859
4860         if (be_physfn(adapter) && !lancer_chip(adapter))
4861                 be_cmd_set_features(adapter);
4862
4863         be_schedule_worker(adapter);
4864         adapter->flags |= BE_FLAGS_SETUP_DONE;
4865         return 0;
4866 err:
4867         be_clear(adapter);
4868         return status;
4869 }
4870
4871 #ifdef CONFIG_NET_POLL_CONTROLLER
4872 static void be_netpoll(struct net_device *netdev)
4873 {
4874         struct be_adapter *adapter = netdev_priv(netdev);
4875         struct be_eq_obj *eqo;
4876         int i;
4877
4878         for_all_evt_queues(adapter, eqo, i) {
4879                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4880                 napi_schedule(&eqo->napi);
4881         }
4882 }
4883 #endif
4884
4885 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4886 {
4887         const struct firmware *fw;
4888         int status;
4889
4890         if (!netif_running(adapter->netdev)) {
4891                 dev_err(&adapter->pdev->dev,
4892                         "Firmware load not allowed (interface is down)\n");
4893                 return -ENETDOWN;
4894         }
4895
4896         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4897         if (status)
4898                 goto fw_exit;
4899
4900         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4901
4902         if (lancer_chip(adapter))
4903                 status = lancer_fw_download(adapter, fw);
4904         else
4905                 status = be_fw_download(adapter, fw);
4906
4907         if (!status)
4908                 be_cmd_get_fw_ver(adapter);
4909
4910 fw_exit:
4911         release_firmware(fw);
4912         return status;
4913 }
4914
4915 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4916                                  u16 flags)
4917 {
4918         struct be_adapter *adapter = netdev_priv(dev);
4919         struct nlattr *attr, *br_spec;
4920         int rem;
4921         int status = 0;
4922         u16 mode = 0;
4923
4924         if (!sriov_enabled(adapter))
4925                 return -EOPNOTSUPP;
4926
4927         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4928         if (!br_spec)
4929                 return -EINVAL;
4930
4931         nla_for_each_nested(attr, br_spec, rem) {
4932                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4933                         continue;
4934
4935                 if (nla_len(attr) < sizeof(mode))
4936                         return -EINVAL;
4937
4938                 mode = nla_get_u16(attr);
4939                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4940                         return -EOPNOTSUPP;
4941
4942                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4943                         return -EINVAL;
4944
4945                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4946                                                adapter->if_handle,
4947                                                mode == BRIDGE_MODE_VEPA ?
4948                                                PORT_FWD_TYPE_VEPA :
4949                                                PORT_FWD_TYPE_VEB, 0);
4950                 if (status)
4951                         goto err;
4952
4953                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4954                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4955
4956                 return status;
4957         }
4958 err:
4959         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4960                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4961
4962         return status;
4963 }
4964
4965 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4966                                  struct net_device *dev, u32 filter_mask,
4967                                  int nlflags)
4968 {
4969         struct be_adapter *adapter = netdev_priv(dev);
4970         int status = 0;
4971         u8 hsw_mode;
4972
4973         /* BE and Lancer chips support VEB mode only */
4974         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4975                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4976                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4977                         return 0;
4978                 hsw_mode = PORT_FWD_TYPE_VEB;
4979         } else {
4980                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4981                                                adapter->if_handle, &hsw_mode,
4982                                                NULL);
4983                 if (status)
4984                         return 0;
4985
4986                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4987                         return 0;
4988         }
4989
4990         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4991                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4992                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4993                                        0, 0, nlflags, filter_mask, NULL);
4994 }
4995
4996 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4997                                          void (*func)(struct work_struct *))
4998 {
4999         struct be_cmd_work *work;
5000
5001         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5002         if (!work) {
5003                 dev_err(&adapter->pdev->dev,
5004                         "be_work memory allocation failed\n");
5005                 return NULL;
5006         }
5007
5008         INIT_WORK(&work->work, func);
5009         work->adapter = adapter;
5010         return work;
5011 }
5012
5013 /* VxLAN offload Notes:
5014  *
5015  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5016  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5017  * is expected to work across all types of IP tunnels once exported. Skyhawk
5018  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5019  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5020  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5021  * those other tunnels are unexported on the fly through ndo_features_check().
5022  *
5023  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5024  * adds more than one port, disable offloads and don't re-enable them again
5025  * until after all the tunnels are removed.
5026  */
5027 static void be_work_add_vxlan_port(struct work_struct *work)
5028 {
5029         struct be_cmd_work *cmd_work =
5030                                 container_of(work, struct be_cmd_work, work);
5031         struct be_adapter *adapter = cmd_work->adapter;
5032         struct net_device *netdev = adapter->netdev;
5033         struct device *dev = &adapter->pdev->dev;
5034         __be16 port = cmd_work->info.vxlan_port;
5035         int status;
5036
5037         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5038                 adapter->vxlan_port_aliases++;
5039                 goto done;
5040         }
5041
5042         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5043                 dev_info(dev,
5044                          "Only one UDP port supported for VxLAN offloads\n");
5045                 dev_info(dev, "Disabling VxLAN offloads\n");
5046                 adapter->vxlan_port_count++;
5047                 goto err;
5048         }
5049
5050         if (adapter->vxlan_port_count++ >= 1)
5051                 goto done;
5052
5053         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5054                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5055         if (status) {
5056                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5057                 goto err;
5058         }
5059
5060         status = be_cmd_set_vxlan_port(adapter, port);
5061         if (status) {
5062                 dev_warn(dev, "Failed to add VxLAN port\n");
5063                 goto err;
5064         }
5065         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5066         adapter->vxlan_port = port;
5067
5068         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5069                                    NETIF_F_TSO | NETIF_F_TSO6 |
5070                                    NETIF_F_GSO_UDP_TUNNEL;
5071         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5072         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5073
5074         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5075                  be16_to_cpu(port));
5076         goto done;
5077 err:
5078         be_disable_vxlan_offloads(adapter);
5079 done:
5080         kfree(cmd_work);
5081 }
5082
5083 static void be_work_del_vxlan_port(struct work_struct *work)
5084 {
5085         struct be_cmd_work *cmd_work =
5086                                 container_of(work, struct be_cmd_work, work);
5087         struct be_adapter *adapter = cmd_work->adapter;
5088         __be16 port = cmd_work->info.vxlan_port;
5089
5090         if (adapter->vxlan_port != port)
5091                 goto done;
5092
5093         if (adapter->vxlan_port_aliases) {
5094                 adapter->vxlan_port_aliases--;
5095                 goto out;
5096         }
5097
5098         be_disable_vxlan_offloads(adapter);
5099
5100         dev_info(&adapter->pdev->dev,
5101                  "Disabled VxLAN offloads for UDP port %d\n",
5102                  be16_to_cpu(port));
5103 done:
5104         adapter->vxlan_port_count--;
5105 out:
5106         kfree(cmd_work);
5107 }
5108
5109 static void be_cfg_vxlan_port(struct net_device *netdev,
5110                               struct udp_tunnel_info *ti,
5111                               void (*func)(struct work_struct *))
5112 {
5113         struct be_adapter *adapter = netdev_priv(netdev);
5114         struct be_cmd_work *cmd_work;
5115
5116         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5117                 return;
5118
5119         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5120                 return;
5121
5122         cmd_work = be_alloc_work(adapter, func);
5123         if (cmd_work) {
5124                 cmd_work->info.vxlan_port = ti->port;
5125                 queue_work(be_wq, &cmd_work->work);
5126         }
5127 }
5128
5129 static void be_del_vxlan_port(struct net_device *netdev,
5130                               struct udp_tunnel_info *ti)
5131 {
5132         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5133 }
5134
5135 static void be_add_vxlan_port(struct net_device *netdev,
5136                               struct udp_tunnel_info *ti)
5137 {
5138         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5139 }
5140
5141 static netdev_features_t be_features_check(struct sk_buff *skb,
5142                                            struct net_device *dev,
5143                                            netdev_features_t features)
5144 {
5145         struct be_adapter *adapter = netdev_priv(dev);
5146         u8 l4_hdr = 0;
5147
5148         /* The code below restricts offload features for some tunneled packets.
5149          * Offload features for normal (non tunnel) packets are unchanged.
5150          */
5151         if (!skb->encapsulation ||
5152             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5153                 return features;
5154
5155         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5156          * should disable tunnel offload features if it's not a VxLAN packet,
5157          * as tunnel offloads have been enabled only for VxLAN. This is done to
5158          * allow other tunneled traffic like GRE work fine while VxLAN
5159          * offloads are configured in Skyhawk-R.
5160          */
5161         switch (vlan_get_protocol(skb)) {
5162         case htons(ETH_P_IP):
5163                 l4_hdr = ip_hdr(skb)->protocol;
5164                 break;
5165         case htons(ETH_P_IPV6):
5166                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5167                 break;
5168         default:
5169                 return features;
5170         }
5171
5172         if (l4_hdr != IPPROTO_UDP ||
5173             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5174             skb->inner_protocol != htons(ETH_P_TEB) ||
5175             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5176             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5177                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5178
5179         return features;
5180 }
5181
5182 static int be_get_phys_port_id(struct net_device *dev,
5183                                struct netdev_phys_item_id *ppid)
5184 {
5185         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5186         struct be_adapter *adapter = netdev_priv(dev);
5187         u8 *id;
5188
5189         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5190                 return -ENOSPC;
5191
5192         ppid->id[0] = adapter->hba_port_num + 1;
5193         id = &ppid->id[1];
5194         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5195              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5196                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5197
5198         ppid->id_len = id_len;
5199
5200         return 0;
5201 }
5202
5203 static void be_set_rx_mode(struct net_device *dev)
5204 {
5205         struct be_adapter *adapter = netdev_priv(dev);
5206         struct be_cmd_work *work;
5207
5208         work = be_alloc_work(adapter, be_work_set_rx_mode);
5209         if (work)
5210                 queue_work(be_wq, &work->work);
5211 }
5212
5213 static const struct net_device_ops be_netdev_ops = {
5214         .ndo_open               = be_open,
5215         .ndo_stop               = be_close,
5216         .ndo_start_xmit         = be_xmit,
5217         .ndo_set_rx_mode        = be_set_rx_mode,
5218         .ndo_set_mac_address    = be_mac_addr_set,
5219         .ndo_change_mtu         = be_change_mtu,
5220         .ndo_get_stats64        = be_get_stats64,
5221         .ndo_validate_addr      = eth_validate_addr,
5222         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5223         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5224         .ndo_set_vf_mac         = be_set_vf_mac,
5225         .ndo_set_vf_vlan        = be_set_vf_vlan,
5226         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5227         .ndo_get_vf_config      = be_get_vf_config,
5228         .ndo_set_vf_link_state  = be_set_vf_link_state,
5229         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5230 #ifdef CONFIG_NET_POLL_CONTROLLER
5231         .ndo_poll_controller    = be_netpoll,
5232 #endif
5233         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5234         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5235 #ifdef CONFIG_NET_RX_BUSY_POLL
5236         .ndo_busy_poll          = be_busy_poll,
5237 #endif
5238         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5239         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5240         .ndo_features_check     = be_features_check,
5241         .ndo_get_phys_port_id   = be_get_phys_port_id,
5242 };
5243
5244 static void be_netdev_init(struct net_device *netdev)
5245 {
5246         struct be_adapter *adapter = netdev_priv(netdev);
5247
5248         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5249                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5250                 NETIF_F_HW_VLAN_CTAG_TX;
5251         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5252                 netdev->hw_features |= NETIF_F_RXHASH;
5253
5254         netdev->features |= netdev->hw_features |
5255                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5256
5257         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5258                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5259
5260         netdev->priv_flags |= IFF_UNICAST_FLT;
5261
5262         netdev->flags |= IFF_MULTICAST;
5263
5264         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5265
5266         netdev->netdev_ops = &be_netdev_ops;
5267
5268         netdev->ethtool_ops = &be_ethtool_ops;
5269 }
5270
5271 static void be_cleanup(struct be_adapter *adapter)
5272 {
5273         struct net_device *netdev = adapter->netdev;
5274
5275         rtnl_lock();
5276         netif_device_detach(netdev);
5277         if (netif_running(netdev))
5278                 be_close(netdev);
5279         rtnl_unlock();
5280
5281         be_clear(adapter);
5282 }
5283
5284 static int be_resume(struct be_adapter *adapter)
5285 {
5286         struct net_device *netdev = adapter->netdev;
5287         int status;
5288
5289         status = be_setup(adapter);
5290         if (status)
5291                 return status;
5292
5293         rtnl_lock();
5294         if (netif_running(netdev))
5295                 status = be_open(netdev);
5296         rtnl_unlock();
5297
5298         if (status)
5299                 return status;
5300
5301         netif_device_attach(netdev);
5302
5303         return 0;
5304 }
5305
5306 static void be_soft_reset(struct be_adapter *adapter)
5307 {
5308         u32 val;
5309
5310         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5311         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5312         val |= SLIPORT_SOFTRESET_SR_MASK;
5313         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5314 }
5315
5316 static bool be_err_is_recoverable(struct be_adapter *adapter)
5317 {
5318         struct be_error_recovery *err_rec = &adapter->error_recovery;
5319         unsigned long initial_idle_time =
5320                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5321         unsigned long recovery_interval =
5322                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5323         u16 ue_err_code;
5324         u32 val;
5325
5326         val = be_POST_stage_get(adapter);
5327         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5328                 return false;
5329         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5330         if (ue_err_code == 0)
5331                 return false;
5332
5333         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5334                 ue_err_code);
5335
5336         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5337                 dev_err(&adapter->pdev->dev,
5338                         "Cannot recover within %lu sec from driver load\n",
5339                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5340                 return false;
5341         }
5342
5343         if (err_rec->last_recovery_time &&
5344             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5345                 dev_err(&adapter->pdev->dev,
5346                         "Cannot recover within %lu sec from last recovery\n",
5347                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5348                 return false;
5349         }
5350
5351         if (ue_err_code == err_rec->last_err_code) {
5352                 dev_err(&adapter->pdev->dev,
5353                         "Cannot recover from a consecutive TPE error\n");
5354                 return false;
5355         }
5356
5357         err_rec->last_recovery_time = jiffies;
5358         err_rec->last_err_code = ue_err_code;
5359         return true;
5360 }
5361
5362 static int be_tpe_recover(struct be_adapter *adapter)
5363 {
5364         struct be_error_recovery *err_rec = &adapter->error_recovery;
5365         int status = -EAGAIN;
5366         u32 val;
5367
5368         switch (err_rec->recovery_state) {
5369         case ERR_RECOVERY_ST_NONE:
5370                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5371                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5372                 break;
5373
5374         case ERR_RECOVERY_ST_DETECT:
5375                 val = be_POST_stage_get(adapter);
5376                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5377                     POST_STAGE_RECOVERABLE_ERR) {
5378                         dev_err(&adapter->pdev->dev,
5379                                 "Unrecoverable HW error detected: 0x%x\n", val);
5380                         status = -EINVAL;
5381                         err_rec->resched_delay = 0;
5382                         break;
5383                 }
5384
5385                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5386
5387                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5388                  * milliseconds before it checks for final error status in
5389                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5390                  * If it does, then PF0 initiates a Soft Reset.
5391                  */
5392                 if (adapter->pf_num == 0) {
5393                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5394                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5395                                         ERR_RECOVERY_UE_DETECT_DURATION;
5396                         break;
5397                 }
5398
5399                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5400                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5401                                         ERR_RECOVERY_UE_DETECT_DURATION;
5402                 break;
5403
5404         case ERR_RECOVERY_ST_RESET:
5405                 if (!be_err_is_recoverable(adapter)) {
5406                         dev_err(&adapter->pdev->dev,
5407                                 "Failed to meet recovery criteria\n");
5408                         status = -EIO;
5409                         err_rec->resched_delay = 0;
5410                         break;
5411                 }
5412                 be_soft_reset(adapter);
5413                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5414                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5415                                         err_rec->ue_to_reset_time;
5416                 break;
5417
5418         case ERR_RECOVERY_ST_PRE_POLL:
5419                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5420                 err_rec->resched_delay = 0;
5421                 status = 0;                     /* done */
5422                 break;
5423
5424         default:
5425                 status = -EINVAL;
5426                 err_rec->resched_delay = 0;
5427                 break;
5428         }
5429
5430         return status;
5431 }
5432
5433 static int be_err_recover(struct be_adapter *adapter)
5434 {
5435         int status;
5436
5437         if (!lancer_chip(adapter)) {
5438                 if (!adapter->error_recovery.recovery_supported ||
5439                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5440                         return -EIO;
5441                 status = be_tpe_recover(adapter);
5442                 if (status)
5443                         goto err;
5444         }
5445
5446         /* Wait for adapter to reach quiescent state before
5447          * destroying queues
5448          */
5449         status = be_fw_wait_ready(adapter);
5450         if (status)
5451                 goto err;
5452
5453         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5454
5455         be_cleanup(adapter);
5456
5457         status = be_resume(adapter);
5458         if (status)
5459                 goto err;
5460
5461         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5462
5463 err:
5464         return status;
5465 }
5466
5467 static void be_err_detection_task(struct work_struct *work)
5468 {
5469         struct be_error_recovery *err_rec =
5470                         container_of(work, struct be_error_recovery,
5471                                      err_detection_work.work);
5472         struct be_adapter *adapter =
5473                         container_of(err_rec, struct be_adapter,
5474                                      error_recovery);
5475         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5476         struct device *dev = &adapter->pdev->dev;
5477         int recovery_status;
5478
5479         be_detect_error(adapter);
5480         if (!be_check_error(adapter, BE_ERROR_HW))
5481                 goto reschedule_task;
5482
5483         recovery_status = be_err_recover(adapter);
5484         if (!recovery_status) {
5485                 err_rec->recovery_retries = 0;
5486                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5487                 dev_info(dev, "Adapter recovery successful\n");
5488                 goto reschedule_task;
5489         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5490                 /* BEx/SH recovery state machine */
5491                 if (adapter->pf_num == 0 &&
5492                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5493                         dev_err(&adapter->pdev->dev,
5494                                 "Adapter recovery in progress\n");
5495                 resched_delay = err_rec->resched_delay;
5496                 goto reschedule_task;
5497         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5498                 /* For VFs, check if PF have allocated resources
5499                  * every second.
5500                  */
5501                 dev_err(dev, "Re-trying adapter recovery\n");
5502                 goto reschedule_task;
5503         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5504                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5505                 /* In case of another error during recovery, it takes 30 sec
5506                  * for adapter to come out of error. Retry error recovery after
5507                  * this time interval.
5508                  */
5509                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5510                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5511                 goto reschedule_task;
5512         } else {
5513                 dev_err(dev, "Adapter recovery failed\n");
5514                 dev_err(dev, "Please reboot server to recover\n");
5515         }
5516
5517         return;
5518
5519 reschedule_task:
5520         be_schedule_err_detection(adapter, resched_delay);
5521 }
5522
5523 static void be_log_sfp_info(struct be_adapter *adapter)
5524 {
5525         int status;
5526
5527         status = be_cmd_query_sfp_info(adapter);
5528         if (!status) {
5529                 dev_err(&adapter->pdev->dev,
5530                         "Port %c: %s Vendor: %s part no: %s",
5531                         adapter->port_name,
5532                         be_misconfig_evt_port_state[adapter->phy_state],
5533                         adapter->phy.vendor_name,
5534                         adapter->phy.vendor_pn);
5535         }
5536         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5537 }
5538
5539 static void be_worker(struct work_struct *work)
5540 {
5541         struct be_adapter *adapter =
5542                 container_of(work, struct be_adapter, work.work);
5543         struct be_rx_obj *rxo;
5544         int i;
5545
5546         if (be_physfn(adapter) &&
5547             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5548                 be_cmd_get_die_temperature(adapter);
5549
5550         /* when interrupts are not yet enabled, just reap any pending
5551          * mcc completions
5552          */
5553         if (!netif_running(adapter->netdev)) {
5554                 local_bh_disable();
5555                 be_process_mcc(adapter);
5556                 local_bh_enable();
5557                 goto reschedule;
5558         }
5559
5560         if (!adapter->stats_cmd_sent) {
5561                 if (lancer_chip(adapter))
5562                         lancer_cmd_get_pport_stats(adapter,
5563                                                    &adapter->stats_cmd);
5564                 else
5565                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5566         }
5567
5568         for_all_rx_queues(adapter, rxo, i) {
5569                 /* Replenish RX-queues starved due to memory
5570                  * allocation failures.
5571                  */
5572                 if (rxo->rx_post_starved)
5573                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5574         }
5575
5576         /* EQ-delay update for Skyhawk is done while notifying EQ */
5577         if (!skyhawk_chip(adapter))
5578                 be_eqd_update(adapter, false);
5579
5580         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5581                 be_log_sfp_info(adapter);
5582
5583 reschedule:
5584         adapter->work_counter++;
5585         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5586 }
5587
5588 static void be_unmap_pci_bars(struct be_adapter *adapter)
5589 {
5590         if (adapter->csr)
5591                 pci_iounmap(adapter->pdev, adapter->csr);
5592         if (adapter->db)
5593                 pci_iounmap(adapter->pdev, adapter->db);
5594         if (adapter->pcicfg && adapter->pcicfg_mapped)
5595                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5596 }
5597
5598 static int db_bar(struct be_adapter *adapter)
5599 {
5600         if (lancer_chip(adapter) || be_virtfn(adapter))
5601                 return 0;
5602         else
5603                 return 4;
5604 }
5605
5606 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5607 {
5608         if (skyhawk_chip(adapter)) {
5609                 adapter->roce_db.size = 4096;
5610                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5611                                                               db_bar(adapter));
5612                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5613                                                                db_bar(adapter));
5614         }
5615         return 0;
5616 }
5617
5618 static int be_map_pci_bars(struct be_adapter *adapter)
5619 {
5620         struct pci_dev *pdev = adapter->pdev;
5621         u8 __iomem *addr;
5622         u32 sli_intf;
5623
5624         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5625         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5626                                 SLI_INTF_FAMILY_SHIFT;
5627         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5628
5629         if (BEx_chip(adapter) && be_physfn(adapter)) {
5630                 adapter->csr = pci_iomap(pdev, 2, 0);
5631                 if (!adapter->csr)
5632                         return -ENOMEM;
5633         }
5634
5635         addr = pci_iomap(pdev, db_bar(adapter), 0);
5636         if (!addr)
5637                 goto pci_map_err;
5638         adapter->db = addr;
5639
5640         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5641                 if (be_physfn(adapter)) {
5642                         /* PCICFG is the 2nd BAR in BE2 */
5643                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5644                         if (!addr)
5645                                 goto pci_map_err;
5646                         adapter->pcicfg = addr;
5647                         adapter->pcicfg_mapped = true;
5648                 } else {
5649                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5650                         adapter->pcicfg_mapped = false;
5651                 }
5652         }
5653
5654         be_roce_map_pci_bars(adapter);
5655         return 0;
5656
5657 pci_map_err:
5658         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5659         be_unmap_pci_bars(adapter);
5660         return -ENOMEM;
5661 }
5662
5663 static void be_drv_cleanup(struct be_adapter *adapter)
5664 {
5665         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5666         struct device *dev = &adapter->pdev->dev;
5667
5668         if (mem->va)
5669                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5670
5671         mem = &adapter->rx_filter;
5672         if (mem->va)
5673                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5674
5675         mem = &adapter->stats_cmd;
5676         if (mem->va)
5677                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5678 }
5679
5680 /* Allocate and initialize various fields in be_adapter struct */
5681 static int be_drv_init(struct be_adapter *adapter)
5682 {
5683         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5684         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5685         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5686         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5687         struct device *dev = &adapter->pdev->dev;
5688         int status = 0;
5689
5690         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5691         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5692                                                  &mbox_mem_alloc->dma,
5693                                                  GFP_KERNEL);
5694         if (!mbox_mem_alloc->va)
5695                 return -ENOMEM;
5696
5697         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5698         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5699         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5700
5701         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5702         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5703                                             &rx_filter->dma, GFP_KERNEL);
5704         if (!rx_filter->va) {
5705                 status = -ENOMEM;
5706                 goto free_mbox;
5707         }
5708
5709         if (lancer_chip(adapter))
5710                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5711         else if (BE2_chip(adapter))
5712                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5713         else if (BE3_chip(adapter))
5714                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5715         else
5716                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5717         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5718                                             &stats_cmd->dma, GFP_KERNEL);
5719         if (!stats_cmd->va) {
5720                 status = -ENOMEM;
5721                 goto free_rx_filter;
5722         }
5723
5724         mutex_init(&adapter->mbox_lock);
5725         mutex_init(&adapter->mcc_lock);
5726         mutex_init(&adapter->rx_filter_lock);
5727         spin_lock_init(&adapter->mcc_cq_lock);
5728         init_completion(&adapter->et_cmd_compl);
5729
5730         pci_save_state(adapter->pdev);
5731
5732         INIT_DELAYED_WORK(&adapter->work, be_worker);
5733
5734         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5735         adapter->error_recovery.resched_delay = 0;
5736         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5737                           be_err_detection_task);
5738
5739         adapter->rx_fc = true;
5740         adapter->tx_fc = true;
5741
5742         /* Must be a power of 2 or else MODULO will BUG_ON */
5743         adapter->be_get_temp_freq = 64;
5744
5745         return 0;
5746
5747 free_rx_filter:
5748         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5749 free_mbox:
5750         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5751                           mbox_mem_alloc->dma);
5752         return status;
5753 }
5754
5755 static void be_remove(struct pci_dev *pdev)
5756 {
5757         struct be_adapter *adapter = pci_get_drvdata(pdev);
5758
5759         if (!adapter)
5760                 return;
5761
5762         be_roce_dev_remove(adapter);
5763         be_intr_set(adapter, false);
5764
5765         be_cancel_err_detection(adapter);
5766
5767         unregister_netdev(adapter->netdev);
5768
5769         be_clear(adapter);
5770
5771         if (!pci_vfs_assigned(adapter->pdev))
5772                 be_cmd_reset_function(adapter);
5773
5774         /* tell fw we're done with firing cmds */
5775         be_cmd_fw_clean(adapter);
5776
5777         be_unmap_pci_bars(adapter);
5778         be_drv_cleanup(adapter);
5779
5780         pci_disable_pcie_error_reporting(pdev);
5781
5782         pci_release_regions(pdev);
5783         pci_disable_device(pdev);
5784
5785         free_netdev(adapter->netdev);
5786 }
5787
5788 static ssize_t be_hwmon_show_temp(struct device *dev,
5789                                   struct device_attribute *dev_attr,
5790                                   char *buf)
5791 {
5792         struct be_adapter *adapter = dev_get_drvdata(dev);
5793
5794         /* Unit: millidegree Celsius */
5795         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5796                 return -EIO;
5797         else
5798                 return sprintf(buf, "%u\n",
5799                                adapter->hwmon_info.be_on_die_temp * 1000);
5800 }
5801
5802 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5803                           be_hwmon_show_temp, NULL, 1);
5804
5805 static struct attribute *be_hwmon_attrs[] = {
5806         &sensor_dev_attr_temp1_input.dev_attr.attr,
5807         NULL
5808 };
5809
5810 ATTRIBUTE_GROUPS(be_hwmon);
5811
5812 static char *mc_name(struct be_adapter *adapter)
5813 {
5814         char *str = ""; /* default */
5815
5816         switch (adapter->mc_type) {
5817         case UMC:
5818                 str = "UMC";
5819                 break;
5820         case FLEX10:
5821                 str = "FLEX10";
5822                 break;
5823         case vNIC1:
5824                 str = "vNIC-1";
5825                 break;
5826         case nPAR:
5827                 str = "nPAR";
5828                 break;
5829         case UFP:
5830                 str = "UFP";
5831                 break;
5832         case vNIC2:
5833                 str = "vNIC-2";
5834                 break;
5835         default:
5836                 str = "";
5837         }
5838
5839         return str;
5840 }
5841
5842 static inline char *func_name(struct be_adapter *adapter)
5843 {
5844         return be_physfn(adapter) ? "PF" : "VF";
5845 }
5846
5847 static inline char *nic_name(struct pci_dev *pdev)
5848 {
5849         switch (pdev->device) {
5850         case OC_DEVICE_ID1:
5851                 return OC_NAME;
5852         case OC_DEVICE_ID2:
5853                 return OC_NAME_BE;
5854         case OC_DEVICE_ID3:
5855         case OC_DEVICE_ID4:
5856                 return OC_NAME_LANCER;
5857         case BE_DEVICE_ID2:
5858                 return BE3_NAME;
5859         case OC_DEVICE_ID5:
5860         case OC_DEVICE_ID6:
5861                 return OC_NAME_SH;
5862         default:
5863                 return BE_NAME;
5864         }
5865 }
5866
5867 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5868 {
5869         struct be_adapter *adapter;
5870         struct net_device *netdev;
5871         int status = 0;
5872
5873         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5874
5875         status = pci_enable_device(pdev);
5876         if (status)
5877                 goto do_none;
5878
5879         status = pci_request_regions(pdev, DRV_NAME);
5880         if (status)
5881                 goto disable_dev;
5882         pci_set_master(pdev);
5883
5884         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5885         if (!netdev) {
5886                 status = -ENOMEM;
5887                 goto rel_reg;
5888         }
5889         adapter = netdev_priv(netdev);
5890         adapter->pdev = pdev;
5891         pci_set_drvdata(pdev, adapter);
5892         adapter->netdev = netdev;
5893         SET_NETDEV_DEV(netdev, &pdev->dev);
5894
5895         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5896         if (!status) {
5897                 netdev->features |= NETIF_F_HIGHDMA;
5898         } else {
5899                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5900                 if (status) {
5901                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5902                         goto free_netdev;
5903                 }
5904         }
5905
5906         status = pci_enable_pcie_error_reporting(pdev);
5907         if (!status)
5908                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5909
5910         status = be_map_pci_bars(adapter);
5911         if (status)
5912                 goto free_netdev;
5913
5914         status = be_drv_init(adapter);
5915         if (status)
5916                 goto unmap_bars;
5917
5918         status = be_setup(adapter);
5919         if (status)
5920                 goto drv_cleanup;
5921
5922         be_netdev_init(netdev);
5923         status = register_netdev(netdev);
5924         if (status != 0)
5925                 goto unsetup;
5926
5927         be_roce_dev_add(adapter);
5928
5929         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5930         adapter->error_recovery.probe_time = jiffies;
5931
5932         /* On Die temperature not supported for VF. */
5933         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5934                 adapter->hwmon_info.hwmon_dev =
5935                         devm_hwmon_device_register_with_groups(&pdev->dev,
5936                                                                DRV_NAME,
5937                                                                adapter,
5938                                                                be_hwmon_groups);
5939                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5940         }
5941
5942         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5943                  func_name(adapter), mc_name(adapter), adapter->port_name);
5944
5945         return 0;
5946
5947 unsetup:
5948         be_clear(adapter);
5949 drv_cleanup:
5950         be_drv_cleanup(adapter);
5951 unmap_bars:
5952         be_unmap_pci_bars(adapter);
5953 free_netdev:
5954         free_netdev(netdev);
5955 rel_reg:
5956         pci_release_regions(pdev);
5957 disable_dev:
5958         pci_disable_device(pdev);
5959 do_none:
5960         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5961         return status;
5962 }
5963
5964 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5965 {
5966         struct be_adapter *adapter = pci_get_drvdata(pdev);
5967
5968         be_intr_set(adapter, false);
5969         be_cancel_err_detection(adapter);
5970
5971         be_cleanup(adapter);
5972
5973         pci_save_state(pdev);
5974         pci_disable_device(pdev);
5975         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5976         return 0;
5977 }
5978
5979 static int be_pci_resume(struct pci_dev *pdev)
5980 {
5981         struct be_adapter *adapter = pci_get_drvdata(pdev);
5982         int status = 0;
5983
5984         status = pci_enable_device(pdev);
5985         if (status)
5986                 return status;
5987
5988         pci_restore_state(pdev);
5989
5990         status = be_resume(adapter);
5991         if (status)
5992                 return status;
5993
5994         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5995
5996         return 0;
5997 }
5998
5999 /*
6000  * An FLR will stop BE from DMAing any data.
6001  */
6002 static void be_shutdown(struct pci_dev *pdev)
6003 {
6004         struct be_adapter *adapter = pci_get_drvdata(pdev);
6005
6006         if (!adapter)
6007                 return;
6008
6009         be_roce_dev_shutdown(adapter);
6010         cancel_delayed_work_sync(&adapter->work);
6011         be_cancel_err_detection(adapter);
6012
6013         netif_device_detach(adapter->netdev);
6014
6015         be_cmd_reset_function(adapter);
6016
6017         pci_disable_device(pdev);
6018 }
6019
6020 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6021                                             pci_channel_state_t state)
6022 {
6023         struct be_adapter *adapter = pci_get_drvdata(pdev);
6024
6025         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6026
6027         be_roce_dev_remove(adapter);
6028
6029         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6030                 be_set_error(adapter, BE_ERROR_EEH);
6031
6032                 be_cancel_err_detection(adapter);
6033
6034                 be_cleanup(adapter);
6035         }
6036
6037         if (state == pci_channel_io_perm_failure)
6038                 return PCI_ERS_RESULT_DISCONNECT;
6039
6040         pci_disable_device(pdev);
6041
6042         /* The error could cause the FW to trigger a flash debug dump.
6043          * Resetting the card while flash dump is in progress
6044          * can cause it not to recover; wait for it to finish.
6045          * Wait only for first function as it is needed only once per
6046          * adapter.
6047          */
6048         if (pdev->devfn == 0)
6049                 ssleep(30);
6050
6051         return PCI_ERS_RESULT_NEED_RESET;
6052 }
6053
6054 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6055 {
6056         struct be_adapter *adapter = pci_get_drvdata(pdev);
6057         int status;
6058
6059         dev_info(&adapter->pdev->dev, "EEH reset\n");
6060
6061         status = pci_enable_device(pdev);
6062         if (status)
6063                 return PCI_ERS_RESULT_DISCONNECT;
6064
6065         pci_set_master(pdev);
6066         pci_restore_state(pdev);
6067
6068         /* Check if card is ok and fw is ready */
6069         dev_info(&adapter->pdev->dev,
6070                  "Waiting for FW to be ready after EEH reset\n");
6071         status = be_fw_wait_ready(adapter);
6072         if (status)
6073                 return PCI_ERS_RESULT_DISCONNECT;
6074
6075         pci_cleanup_aer_uncorrect_error_status(pdev);
6076         be_clear_error(adapter, BE_CLEAR_ALL);
6077         return PCI_ERS_RESULT_RECOVERED;
6078 }
6079
6080 static void be_eeh_resume(struct pci_dev *pdev)
6081 {
6082         int status = 0;
6083         struct be_adapter *adapter = pci_get_drvdata(pdev);
6084
6085         dev_info(&adapter->pdev->dev, "EEH resume\n");
6086
6087         pci_save_state(pdev);
6088
6089         status = be_resume(adapter);
6090         if (status)
6091                 goto err;
6092
6093         be_roce_dev_add(adapter);
6094
6095         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6096         return;
6097 err:
6098         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6099 }
6100
6101 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6102 {
6103         struct be_adapter *adapter = pci_get_drvdata(pdev);
6104         struct be_resources vft_res = {0};
6105         int status;
6106
6107         if (!num_vfs)
6108                 be_vf_clear(adapter);
6109
6110         adapter->num_vfs = num_vfs;
6111
6112         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6113                 dev_warn(&pdev->dev,
6114                          "Cannot disable VFs while they are assigned\n");
6115                 return -EBUSY;
6116         }
6117
6118         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6119          * are equally distributed across the max-number of VFs. The user may
6120          * request only a subset of the max-vfs to be enabled.
6121          * Based on num_vfs, redistribute the resources across num_vfs so that
6122          * each VF will have access to more number of resources.
6123          * This facility is not available in BE3 FW.
6124          * Also, this is done by FW in Lancer chip.
6125          */
6126         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6127                 be_calculate_vf_res(adapter, adapter->num_vfs,
6128                                     &vft_res);
6129                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6130                                                  adapter->num_vfs, &vft_res);
6131                 if (status)
6132                         dev_err(&pdev->dev,
6133                                 "Failed to optimize SR-IOV resources\n");
6134         }
6135
6136         status = be_get_resources(adapter);
6137         if (status)
6138                 return be_cmd_status(status);
6139
6140         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6141         rtnl_lock();
6142         status = be_update_queues(adapter);
6143         rtnl_unlock();
6144         if (status)
6145                 return be_cmd_status(status);
6146
6147         if (adapter->num_vfs)
6148                 status = be_vf_setup(adapter);
6149
6150         if (!status)
6151                 return adapter->num_vfs;
6152
6153         return 0;
6154 }
6155
6156 static const struct pci_error_handlers be_eeh_handlers = {
6157         .error_detected = be_eeh_err_detected,
6158         .slot_reset = be_eeh_reset,
6159         .resume = be_eeh_resume,
6160 };
6161
6162 static struct pci_driver be_driver = {
6163         .name = DRV_NAME,
6164         .id_table = be_dev_ids,
6165         .probe = be_probe,
6166         .remove = be_remove,
6167         .suspend = be_suspend,
6168         .resume = be_pci_resume,
6169         .shutdown = be_shutdown,
6170         .sriov_configure = be_pci_sriov_configure,
6171         .err_handler = &be_eeh_handlers
6172 };
6173
6174 static int __init be_init_module(void)
6175 {
6176         int status;
6177
6178         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6179             rx_frag_size != 2048) {
6180                 printk(KERN_WARNING DRV_NAME
6181                         " : Module param rx_frag_size must be 2048/4096/8192."
6182                         " Using 2048\n");
6183                 rx_frag_size = 2048;
6184         }
6185
6186         if (num_vfs > 0) {
6187                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6188                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6189         }
6190
6191         be_wq = create_singlethread_workqueue("be_wq");
6192         if (!be_wq) {
6193                 pr_warn(DRV_NAME "workqueue creation failed\n");
6194                 return -1;
6195         }
6196
6197         be_err_recovery_workq =
6198                 create_singlethread_workqueue("be_err_recover");
6199         if (!be_err_recovery_workq)
6200                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6201
6202         status = pci_register_driver(&be_driver);
6203         if (status) {
6204                 destroy_workqueue(be_wq);
6205                 be_destroy_err_recovery_workq();
6206         }
6207         return status;
6208 }
6209 module_init(be_init_module);
6210
6211 static void __exit be_exit_module(void)
6212 {
6213         pci_unregister_driver(&be_driver);
6214
6215         be_destroy_err_recovery_workq();
6216
6217         if (be_wq)
6218                 destroy_workqueue(be_wq);
6219 }
6220 module_exit(be_exit_module);