Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
279                                      mac)) {
280                         /* mac already added, skip addition */
281                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
282                         return 0;
283                 }
284         }
285
286         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
287                                &adapter->pmac_id[0], 0);
288 }
289
290 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
291 {
292         int i;
293
294         /* Skip deletion if the programmed mac is
295          * being used in uc-list
296          */
297         for (i = 0; i < adapter->uc_macs; i++) {
298                 if (adapter->pmac_id[i + 1] == pmac_id)
299                         return;
300         }
301         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
302 }
303
304 static int be_mac_addr_set(struct net_device *netdev, void *p)
305 {
306         struct be_adapter *adapter = netdev_priv(netdev);
307         struct device *dev = &adapter->pdev->dev;
308         struct sockaddr *addr = p;
309         int status;
310         u8 mac[ETH_ALEN];
311         u32 old_pmac_id = adapter->pmac_id[0];
312
313         if (!is_valid_ether_addr(addr->sa_data))
314                 return -EADDRNOTAVAIL;
315
316         /* Proceed further only if, User provided MAC is different
317          * from active MAC
318          */
319         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
320                 return 0;
321
322         /* if device is not running, copy MAC to netdev->dev_addr */
323         if (!netif_running(netdev))
324                 goto done;
325
326         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
327          * privilege or if PF did not provision the new MAC address.
328          * On BE3, this cmd will always fail if the VF doesn't have the
329          * FILTMGMT privilege. This failure is OK, only if the PF programmed
330          * the MAC for the VF.
331          */
332         mutex_lock(&adapter->rx_filter_lock);
333         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
334         if (!status) {
335
336                 /* Delete the old programmed MAC. This call may fail if the
337                  * old MAC was already deleted by the PF driver.
338                  */
339                 if (adapter->pmac_id[0] != old_pmac_id)
340                         be_dev_mac_del(adapter, old_pmac_id);
341         }
342
343         mutex_unlock(&adapter->rx_filter_lock);
344         /* Decide if the new MAC is successfully activated only after
345          * querying the FW
346          */
347         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
348                                        adapter->if_handle, true, 0);
349         if (status)
350                 goto err;
351
352         /* The MAC change did not happen, either due to lack of privilege
353          * or PF didn't pre-provision.
354          */
355         if (!ether_addr_equal(addr->sa_data, mac)) {
356                 status = -EPERM;
357                 goto err;
358         }
359 done:
360         ether_addr_copy(adapter->dev_mac, addr->sa_data);
361         ether_addr_copy(netdev->dev_addr, addr->sa_data);
362         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
363         return 0;
364 err:
365         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
366         return status;
367 }
368
369 /* BE2 supports only v0 cmd */
370 static void *hw_stats_from_cmd(struct be_adapter *adapter)
371 {
372         if (BE2_chip(adapter)) {
373                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
374
375                 return &cmd->hw_stats;
376         } else if (BE3_chip(adapter)) {
377                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
378
379                 return &cmd->hw_stats;
380         } else {
381                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         }
385 }
386
387 /* BE2 supports only v0 cmd */
388 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
389 {
390         if (BE2_chip(adapter)) {
391                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
392
393                 return &hw_stats->erx;
394         } else if (BE3_chip(adapter)) {
395                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
396
397                 return &hw_stats->erx;
398         } else {
399                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         }
403 }
404
405 static void populate_be_v0_stats(struct be_adapter *adapter)
406 {
407         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
409         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
410         struct be_port_rxf_stats_v0 *port_stats =
411                                         &rxf_stats->port[adapter->port_num];
412         struct be_drv_stats *drvs = &adapter->drv_stats;
413
414         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
415         drvs->rx_pause_frames = port_stats->rx_pause_frames;
416         drvs->rx_crc_errors = port_stats->rx_crc_errors;
417         drvs->rx_control_frames = port_stats->rx_control_frames;
418         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
419         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
420         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
421         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
422         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
423         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
424         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
425         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
426         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
427         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
428         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
429         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
430         drvs->rx_dropped_header_too_small =
431                 port_stats->rx_dropped_header_too_small;
432         drvs->rx_address_filtered =
433                                         port_stats->rx_address_filtered +
434                                         port_stats->rx_vlan_filtered;
435         drvs->rx_alignment_symbol_errors =
436                 port_stats->rx_alignment_symbol_errors;
437
438         drvs->tx_pauseframes = port_stats->tx_pauseframes;
439         drvs->tx_controlframes = port_stats->tx_controlframes;
440
441         if (adapter->port_num)
442                 drvs->jabber_events = rxf_stats->port1_jabber_events;
443         else
444                 drvs->jabber_events = rxf_stats->port0_jabber_events;
445         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
446         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
447         drvs->forwarded_packets = rxf_stats->forwarded_packets;
448         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
449         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
450         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
451         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
452 }
453
454 static void populate_be_v1_stats(struct be_adapter *adapter)
455 {
456         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
457         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
458         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
459         struct be_port_rxf_stats_v1 *port_stats =
460                                         &rxf_stats->port[adapter->port_num];
461         struct be_drv_stats *drvs = &adapter->drv_stats;
462
463         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
464         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
465         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
466         drvs->rx_pause_frames = port_stats->rx_pause_frames;
467         drvs->rx_crc_errors = port_stats->rx_crc_errors;
468         drvs->rx_control_frames = port_stats->rx_control_frames;
469         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
470         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
471         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
472         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
473         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
474         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
475         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
476         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
477         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
478         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
479         drvs->rx_dropped_header_too_small =
480                 port_stats->rx_dropped_header_too_small;
481         drvs->rx_input_fifo_overflow_drop =
482                 port_stats->rx_input_fifo_overflow_drop;
483         drvs->rx_address_filtered = port_stats->rx_address_filtered;
484         drvs->rx_alignment_symbol_errors =
485                 port_stats->rx_alignment_symbol_errors;
486         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
487         drvs->tx_pauseframes = port_stats->tx_pauseframes;
488         drvs->tx_controlframes = port_stats->tx_controlframes;
489         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
490         drvs->jabber_events = port_stats->jabber_events;
491         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
492         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
493         drvs->forwarded_packets = rxf_stats->forwarded_packets;
494         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
495         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
496         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
497         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
498 }
499
500 static void populate_be_v2_stats(struct be_adapter *adapter)
501 {
502         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
503         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
504         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
505         struct be_port_rxf_stats_v2 *port_stats =
506                                         &rxf_stats->port[adapter->port_num];
507         struct be_drv_stats *drvs = &adapter->drv_stats;
508
509         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
510         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
511         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
512         drvs->rx_pause_frames = port_stats->rx_pause_frames;
513         drvs->rx_crc_errors = port_stats->rx_crc_errors;
514         drvs->rx_control_frames = port_stats->rx_control_frames;
515         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
516         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
517         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
518         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
519         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
520         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
521         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
522         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
523         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
524         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
525         drvs->rx_dropped_header_too_small =
526                 port_stats->rx_dropped_header_too_small;
527         drvs->rx_input_fifo_overflow_drop =
528                 port_stats->rx_input_fifo_overflow_drop;
529         drvs->rx_address_filtered = port_stats->rx_address_filtered;
530         drvs->rx_alignment_symbol_errors =
531                 port_stats->rx_alignment_symbol_errors;
532         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
533         drvs->tx_pauseframes = port_stats->tx_pauseframes;
534         drvs->tx_controlframes = port_stats->tx_controlframes;
535         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
536         drvs->jabber_events = port_stats->jabber_events;
537         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
538         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
539         drvs->forwarded_packets = rxf_stats->forwarded_packets;
540         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
541         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
542         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
543         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
544         if (be_roce_supported(adapter)) {
545                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
546                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
547                 drvs->rx_roce_frames = port_stats->roce_frames_received;
548                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
549                 drvs->roce_drops_payload_len =
550                         port_stats->roce_drops_payload_len;
551         }
552 }
553
554 static void populate_lancer_stats(struct be_adapter *adapter)
555 {
556         struct be_drv_stats *drvs = &adapter->drv_stats;
557         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
558
559         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
560         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
561         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
562         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
563         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
564         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
565         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
566         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
567         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
568         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
569         drvs->rx_dropped_tcp_length =
570                                 pport_stats->rx_dropped_invalid_tcp_length;
571         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
572         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
573         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
574         drvs->rx_dropped_header_too_small =
575                                 pport_stats->rx_dropped_header_too_small;
576         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
577         drvs->rx_address_filtered =
578                                         pport_stats->rx_address_filtered +
579                                         pport_stats->rx_vlan_filtered;
580         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
581         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
582         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
583         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
584         drvs->jabber_events = pport_stats->rx_jabbers;
585         drvs->forwarded_packets = pport_stats->num_forwards_lo;
586         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
587         drvs->rx_drops_too_many_frags =
588                                 pport_stats->rx_drops_too_many_frags_lo;
589 }
590
591 static void accumulate_16bit_val(u32 *acc, u16 val)
592 {
593 #define lo(x)                   (x & 0xFFFF)
594 #define hi(x)                   (x & 0xFFFF0000)
595         bool wrapped = val < lo(*acc);
596         u32 newacc = hi(*acc) + val;
597
598         if (wrapped)
599                 newacc += 65536;
600         ACCESS_ONCE(*acc) = newacc;
601 }
602
603 static void populate_erx_stats(struct be_adapter *adapter,
604                                struct be_rx_obj *rxo, u32 erx_stat)
605 {
606         if (!BEx_chip(adapter))
607                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
608         else
609                 /* below erx HW counter can actually wrap around after
610                  * 65535. Driver accumulates a 32-bit value
611                  */
612                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
613                                      (u16)erx_stat);
614 }
615
616 void be_parse_stats(struct be_adapter *adapter)
617 {
618         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
619         struct be_rx_obj *rxo;
620         int i;
621         u32 erx_stat;
622
623         if (lancer_chip(adapter)) {
624                 populate_lancer_stats(adapter);
625         } else {
626                 if (BE2_chip(adapter))
627                         populate_be_v0_stats(adapter);
628                 else if (BE3_chip(adapter))
629                         /* for BE3 */
630                         populate_be_v1_stats(adapter);
631                 else
632                         populate_be_v2_stats(adapter);
633
634                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
635                 for_all_rx_queues(adapter, rxo, i) {
636                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
637                         populate_erx_stats(adapter, rxo, erx_stat);
638                 }
639         }
640 }
641
642 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
643                                                 struct rtnl_link_stats64 *stats)
644 {
645         struct be_adapter *adapter = netdev_priv(netdev);
646         struct be_drv_stats *drvs = &adapter->drv_stats;
647         struct be_rx_obj *rxo;
648         struct be_tx_obj *txo;
649         u64 pkts, bytes;
650         unsigned int start;
651         int i;
652
653         for_all_rx_queues(adapter, rxo, i) {
654                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
655
656                 do {
657                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
658                         pkts = rx_stats(rxo)->rx_pkts;
659                         bytes = rx_stats(rxo)->rx_bytes;
660                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
661                 stats->rx_packets += pkts;
662                 stats->rx_bytes += bytes;
663                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
664                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
665                                         rx_stats(rxo)->rx_drops_no_frags;
666         }
667
668         for_all_tx_queues(adapter, txo, i) {
669                 const struct be_tx_stats *tx_stats = tx_stats(txo);
670
671                 do {
672                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
673                         pkts = tx_stats(txo)->tx_pkts;
674                         bytes = tx_stats(txo)->tx_bytes;
675                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
676                 stats->tx_packets += pkts;
677                 stats->tx_bytes += bytes;
678         }
679
680         /* bad pkts received */
681         stats->rx_errors = drvs->rx_crc_errors +
682                 drvs->rx_alignment_symbol_errors +
683                 drvs->rx_in_range_errors +
684                 drvs->rx_out_range_errors +
685                 drvs->rx_frame_too_long +
686                 drvs->rx_dropped_too_small +
687                 drvs->rx_dropped_too_short +
688                 drvs->rx_dropped_header_too_small +
689                 drvs->rx_dropped_tcp_length +
690                 drvs->rx_dropped_runt;
691
692         /* detailed rx errors */
693         stats->rx_length_errors = drvs->rx_in_range_errors +
694                 drvs->rx_out_range_errors +
695                 drvs->rx_frame_too_long;
696
697         stats->rx_crc_errors = drvs->rx_crc_errors;
698
699         /* frame alignment errors */
700         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
701
702         /* receiver fifo overrun */
703         /* drops_no_pbuf is no per i/f, it's per BE card */
704         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
705                                 drvs->rx_input_fifo_overflow_drop +
706                                 drvs->rx_drops_no_pbuf;
707         return stats;
708 }
709
710 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
711 {
712         struct net_device *netdev = adapter->netdev;
713
714         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
715                 netif_carrier_off(netdev);
716                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
717         }
718
719         if (link_status)
720                 netif_carrier_on(netdev);
721         else
722                 netif_carrier_off(netdev);
723
724         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
725 }
726
727 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
728 {
729         struct be_tx_stats *stats = tx_stats(txo);
730         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
731
732         u64_stats_update_begin(&stats->sync);
733         stats->tx_reqs++;
734         stats->tx_bytes += skb->len;
735         stats->tx_pkts += tx_pkts;
736         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
737                 stats->tx_vxlan_offload_pkts += tx_pkts;
738         u64_stats_update_end(&stats->sync);
739 }
740
741 /* Returns number of WRBs needed for the skb */
742 static u32 skb_wrb_cnt(struct sk_buff *skb)
743 {
744         /* +1 for the header wrb */
745         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
746 }
747
748 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
749 {
750         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
751         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
752         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
753         wrb->rsvd0 = 0;
754 }
755
756 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
757  * to avoid the swap and shift/mask operations in wrb_fill().
758  */
759 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
760 {
761         wrb->frag_pa_hi = 0;
762         wrb->frag_pa_lo = 0;
763         wrb->frag_len = 0;
764         wrb->rsvd0 = 0;
765 }
766
767 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
768                                      struct sk_buff *skb)
769 {
770         u8 vlan_prio;
771         u16 vlan_tag;
772
773         vlan_tag = skb_vlan_tag_get(skb);
774         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
775         /* If vlan priority provided by OS is NOT in available bmap */
776         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
777                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
778                                 adapter->recommended_prio_bits;
779
780         return vlan_tag;
781 }
782
783 /* Used only for IP tunnel packets */
784 static u16 skb_inner_ip_proto(struct sk_buff *skb)
785 {
786         return (inner_ip_hdr(skb)->version == 4) ?
787                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
788 }
789
790 static u16 skb_ip_proto(struct sk_buff *skb)
791 {
792         return (ip_hdr(skb)->version == 4) ?
793                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
794 }
795
796 static inline bool be_is_txq_full(struct be_tx_obj *txo)
797 {
798         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
799 }
800
801 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
802 {
803         return atomic_read(&txo->q.used) < txo->q.len / 2;
804 }
805
806 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
807 {
808         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
809 }
810
811 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
812                                        struct sk_buff *skb,
813                                        struct be_wrb_params *wrb_params)
814 {
815         u16 proto;
816
817         if (skb_is_gso(skb)) {
818                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
819                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
820                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
821                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
822         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
823                 if (skb->encapsulation) {
824                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
825                         proto = skb_inner_ip_proto(skb);
826                 } else {
827                         proto = skb_ip_proto(skb);
828                 }
829                 if (proto == IPPROTO_TCP)
830                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
831                 else if (proto == IPPROTO_UDP)
832                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
833         }
834
835         if (skb_vlan_tag_present(skb)) {
836                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
837                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
838         }
839
840         BE_WRB_F_SET(wrb_params->features, CRC, 1);
841 }
842
843 static void wrb_fill_hdr(struct be_adapter *adapter,
844                          struct be_eth_hdr_wrb *hdr,
845                          struct be_wrb_params *wrb_params,
846                          struct sk_buff *skb)
847 {
848         memset(hdr, 0, sizeof(*hdr));
849
850         SET_TX_WRB_HDR_BITS(crc, hdr,
851                             BE_WRB_F_GET(wrb_params->features, CRC));
852         SET_TX_WRB_HDR_BITS(ipcs, hdr,
853                             BE_WRB_F_GET(wrb_params->features, IPCS));
854         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
855                             BE_WRB_F_GET(wrb_params->features, TCPCS));
856         SET_TX_WRB_HDR_BITS(udpcs, hdr,
857                             BE_WRB_F_GET(wrb_params->features, UDPCS));
858
859         SET_TX_WRB_HDR_BITS(lso, hdr,
860                             BE_WRB_F_GET(wrb_params->features, LSO));
861         SET_TX_WRB_HDR_BITS(lso6, hdr,
862                             BE_WRB_F_GET(wrb_params->features, LSO6));
863         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
864
865         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
866          * hack is not needed, the evt bit is set while ringing DB.
867          */
868         SET_TX_WRB_HDR_BITS(event, hdr,
869                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
870         SET_TX_WRB_HDR_BITS(vlan, hdr,
871                             BE_WRB_F_GET(wrb_params->features, VLAN));
872         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
873
874         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
875         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
876         SET_TX_WRB_HDR_BITS(mgmt, hdr,
877                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
878 }
879
880 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
881                           bool unmap_single)
882 {
883         dma_addr_t dma;
884         u32 frag_len = le32_to_cpu(wrb->frag_len);
885
886
887         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
888                 (u64)le32_to_cpu(wrb->frag_pa_lo);
889         if (frag_len) {
890                 if (unmap_single)
891                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
892                 else
893                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
894         }
895 }
896
897 /* Grab a WRB header for xmit */
898 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
899 {
900         u32 head = txo->q.head;
901
902         queue_head_inc(&txo->q);
903         return head;
904 }
905
906 /* Set up the WRB header for xmit */
907 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
908                                 struct be_tx_obj *txo,
909                                 struct be_wrb_params *wrb_params,
910                                 struct sk_buff *skb, u16 head)
911 {
912         u32 num_frags = skb_wrb_cnt(skb);
913         struct be_queue_info *txq = &txo->q;
914         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
915
916         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
917         be_dws_cpu_to_le(hdr, sizeof(*hdr));
918
919         BUG_ON(txo->sent_skb_list[head]);
920         txo->sent_skb_list[head] = skb;
921         txo->last_req_hdr = head;
922         atomic_add(num_frags, &txq->used);
923         txo->last_req_wrb_cnt = num_frags;
924         txo->pend_wrb_cnt += num_frags;
925 }
926
927 /* Setup a WRB fragment (buffer descriptor) for xmit */
928 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
929                                  int len)
930 {
931         struct be_eth_wrb *wrb;
932         struct be_queue_info *txq = &txo->q;
933
934         wrb = queue_head_node(txq);
935         wrb_fill(wrb, busaddr, len);
936         queue_head_inc(txq);
937 }
938
939 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
940  * was invoked. The producer index is restored to the previous packet and the
941  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
942  */
943 static void be_xmit_restore(struct be_adapter *adapter,
944                             struct be_tx_obj *txo, u32 head, bool map_single,
945                             u32 copied)
946 {
947         struct device *dev;
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         dev = &adapter->pdev->dev;
952         txq->head = head;
953
954         /* skip the first wrb (hdr); it's not mapped */
955         queue_head_inc(txq);
956         while (copied) {
957                 wrb = queue_head_node(txq);
958                 unmap_tx_frag(dev, wrb, map_single);
959                 map_single = false;
960                 copied -= le32_to_cpu(wrb->frag_len);
961                 queue_head_inc(txq);
962         }
963
964         txq->head = head;
965 }
966
967 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
968  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
969  * of WRBs used up by the packet.
970  */
971 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
972                            struct sk_buff *skb,
973                            struct be_wrb_params *wrb_params)
974 {
975         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
976         struct device *dev = &adapter->pdev->dev;
977         struct be_queue_info *txq = &txo->q;
978         bool map_single = false;
979         u32 head = txq->head;
980         dma_addr_t busaddr;
981         int len;
982
983         head = be_tx_get_wrb_hdr(txo);
984
985         if (skb->len > skb->data_len) {
986                 len = skb_headlen(skb);
987
988                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
989                 if (dma_mapping_error(dev, busaddr))
990                         goto dma_err;
991                 map_single = true;
992                 be_tx_setup_wrb_frag(txo, busaddr, len);
993                 copied += len;
994         }
995
996         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
997                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
998                 len = skb_frag_size(frag);
999
1000                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1001                 if (dma_mapping_error(dev, busaddr))
1002                         goto dma_err;
1003                 be_tx_setup_wrb_frag(txo, busaddr, len);
1004                 copied += len;
1005         }
1006
1007         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1008
1009         be_tx_stats_update(txo, skb);
1010         return wrb_cnt;
1011
1012 dma_err:
1013         adapter->drv_stats.dma_map_errors++;
1014         be_xmit_restore(adapter, txo, head, map_single, copied);
1015         return 0;
1016 }
1017
1018 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1019 {
1020         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1021 }
1022
1023 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1024                                              struct sk_buff *skb,
1025                                              struct be_wrb_params
1026                                              *wrb_params)
1027 {
1028         u16 vlan_tag = 0;
1029
1030         skb = skb_share_check(skb, GFP_ATOMIC);
1031         if (unlikely(!skb))
1032                 return skb;
1033
1034         if (skb_vlan_tag_present(skb))
1035                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1036
1037         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1038                 if (!vlan_tag)
1039                         vlan_tag = adapter->pvid;
1040                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1041                  * skip VLAN insertion
1042                  */
1043                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1044         }
1045
1046         if (vlan_tag) {
1047                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1048                                                 vlan_tag);
1049                 if (unlikely(!skb))
1050                         return skb;
1051                 skb->vlan_tci = 0;
1052         }
1053
1054         /* Insert the outer VLAN, if any */
1055         if (adapter->qnq_vid) {
1056                 vlan_tag = adapter->qnq_vid;
1057                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1058                                                 vlan_tag);
1059                 if (unlikely(!skb))
1060                         return skb;
1061                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1062         }
1063
1064         return skb;
1065 }
1066
1067 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1068 {
1069         struct ethhdr *eh = (struct ethhdr *)skb->data;
1070         u16 offset = ETH_HLEN;
1071
1072         if (eh->h_proto == htons(ETH_P_IPV6)) {
1073                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1074
1075                 offset += sizeof(struct ipv6hdr);
1076                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1077                     ip6h->nexthdr != NEXTHDR_UDP) {
1078                         struct ipv6_opt_hdr *ehdr =
1079                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1080
1081                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1082                         if (ehdr->hdrlen == 0xff)
1083                                 return true;
1084                 }
1085         }
1086         return false;
1087 }
1088
1089 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1090 {
1091         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1092 }
1093
1094 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1095 {
1096         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1097 }
1098
1099 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1100                                                   struct sk_buff *skb,
1101                                                   struct be_wrb_params
1102                                                   *wrb_params)
1103 {
1104         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1105         unsigned int eth_hdr_len;
1106         struct iphdr *ip;
1107
1108         /* For padded packets, BE HW modifies tot_len field in IP header
1109          * incorrecly when VLAN tag is inserted by HW.
1110          * For padded packets, Lancer computes incorrect checksum.
1111          */
1112         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1113                                                 VLAN_ETH_HLEN : ETH_HLEN;
1114         if (skb->len <= 60 &&
1115             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1116             is_ipv4_pkt(skb)) {
1117                 ip = (struct iphdr *)ip_hdr(skb);
1118                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1119         }
1120
1121         /* If vlan tag is already inlined in the packet, skip HW VLAN
1122          * tagging in pvid-tagging mode
1123          */
1124         if (be_pvid_tagging_enabled(adapter) &&
1125             veh->h_vlan_proto == htons(ETH_P_8021Q))
1126                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1127
1128         /* HW has a bug wherein it will calculate CSUM for VLAN
1129          * pkts even though it is disabled.
1130          * Manually insert VLAN in pkt.
1131          */
1132         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1133             skb_vlan_tag_present(skb)) {
1134                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1135                 if (unlikely(!skb))
1136                         goto err;
1137         }
1138
1139         /* HW may lockup when VLAN HW tagging is requested on
1140          * certain ipv6 packets. Drop such pkts if the HW workaround to
1141          * skip HW tagging is not enabled by FW.
1142          */
1143         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1144                      (adapter->pvid || adapter->qnq_vid) &&
1145                      !qnq_async_evt_rcvd(adapter)))
1146                 goto tx_drop;
1147
1148         /* Manual VLAN tag insertion to prevent:
1149          * ASIC lockup when the ASIC inserts VLAN tag into
1150          * certain ipv6 packets. Insert VLAN tags in driver,
1151          * and set event, completion, vlan bits accordingly
1152          * in the Tx WRB.
1153          */
1154         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1155             be_vlan_tag_tx_chk(adapter, skb)) {
1156                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1157                 if (unlikely(!skb))
1158                         goto err;
1159         }
1160
1161         return skb;
1162 tx_drop:
1163         dev_kfree_skb_any(skb);
1164 err:
1165         return NULL;
1166 }
1167
1168 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1169                                            struct sk_buff *skb,
1170                                            struct be_wrb_params *wrb_params)
1171 {
1172         int err;
1173
1174         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1175          * packets that are 32b or less may cause a transmit stall
1176          * on that port. The workaround is to pad such packets
1177          * (len <= 32 bytes) to a minimum length of 36b.
1178          */
1179         if (skb->len <= 32) {
1180                 if (skb_put_padto(skb, 36))
1181                         return NULL;
1182         }
1183
1184         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1185                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1186                 if (!skb)
1187                         return NULL;
1188         }
1189
1190         /* The stack can send us skbs with length greater than
1191          * what the HW can handle. Trim the extra bytes.
1192          */
1193         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1194         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1195         WARN_ON(err);
1196
1197         return skb;
1198 }
1199
1200 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1201 {
1202         struct be_queue_info *txq = &txo->q;
1203         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1204
1205         /* Mark the last request eventable if it hasn't been marked already */
1206         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1207                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1208
1209         /* compose a dummy wrb if there are odd set of wrbs to notify */
1210         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1211                 wrb_fill_dummy(queue_head_node(txq));
1212                 queue_head_inc(txq);
1213                 atomic_inc(&txq->used);
1214                 txo->pend_wrb_cnt++;
1215                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1216                                            TX_HDR_WRB_NUM_SHIFT);
1217                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1218                                           TX_HDR_WRB_NUM_SHIFT);
1219         }
1220         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1221         txo->pend_wrb_cnt = 0;
1222 }
1223
1224 /* OS2BMC related */
1225
1226 #define DHCP_CLIENT_PORT        68
1227 #define DHCP_SERVER_PORT        67
1228 #define NET_BIOS_PORT1          137
1229 #define NET_BIOS_PORT2          138
1230 #define DHCPV6_RAS_PORT         547
1231
1232 #define is_mc_allowed_on_bmc(adapter, eh)       \
1233         (!is_multicast_filt_enabled(adapter) && \
1234          is_multicast_ether_addr(eh->h_dest) && \
1235          !is_broadcast_ether_addr(eh->h_dest))
1236
1237 #define is_bc_allowed_on_bmc(adapter, eh)       \
1238         (!is_broadcast_filt_enabled(adapter) && \
1239          is_broadcast_ether_addr(eh->h_dest))
1240
1241 #define is_arp_allowed_on_bmc(adapter, skb)     \
1242         (is_arp(skb) && is_arp_filt_enabled(adapter))
1243
1244 #define is_broadcast_packet(eh, adapter)        \
1245                 (is_multicast_ether_addr(eh->h_dest) && \
1246                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1247
1248 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1249
1250 #define is_arp_filt_enabled(adapter)    \
1251                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1252
1253 #define is_dhcp_client_filt_enabled(adapter)    \
1254                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1255
1256 #define is_dhcp_srvr_filt_enabled(adapter)      \
1257                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1258
1259 #define is_nbios_filt_enabled(adapter)  \
1260                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1261
1262 #define is_ipv6_na_filt_enabled(adapter)        \
1263                 (adapter->bmc_filt_mask &       \
1264                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1265
1266 #define is_ipv6_ra_filt_enabled(adapter)        \
1267                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1268
1269 #define is_ipv6_ras_filt_enabled(adapter)       \
1270                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1271
1272 #define is_broadcast_filt_enabled(adapter)      \
1273                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1274
1275 #define is_multicast_filt_enabled(adapter)      \
1276                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1277
1278 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1279                                struct sk_buff **skb)
1280 {
1281         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1282         bool os2bmc = false;
1283
1284         if (!be_is_os2bmc_enabled(adapter))
1285                 goto done;
1286
1287         if (!is_multicast_ether_addr(eh->h_dest))
1288                 goto done;
1289
1290         if (is_mc_allowed_on_bmc(adapter, eh) ||
1291             is_bc_allowed_on_bmc(adapter, eh) ||
1292             is_arp_allowed_on_bmc(adapter, (*skb))) {
1293                 os2bmc = true;
1294                 goto done;
1295         }
1296
1297         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1298                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1299                 u8 nexthdr = hdr->nexthdr;
1300
1301                 if (nexthdr == IPPROTO_ICMPV6) {
1302                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1303
1304                         switch (icmp6->icmp6_type) {
1305                         case NDISC_ROUTER_ADVERTISEMENT:
1306                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1307                                 goto done;
1308                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1309                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1310                                 goto done;
1311                         default:
1312                                 break;
1313                         }
1314                 }
1315         }
1316
1317         if (is_udp_pkt((*skb))) {
1318                 struct udphdr *udp = udp_hdr((*skb));
1319
1320                 switch (ntohs(udp->dest)) {
1321                 case DHCP_CLIENT_PORT:
1322                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1323                         goto done;
1324                 case DHCP_SERVER_PORT:
1325                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1326                         goto done;
1327                 case NET_BIOS_PORT1:
1328                 case NET_BIOS_PORT2:
1329                         os2bmc = is_nbios_filt_enabled(adapter);
1330                         goto done;
1331                 case DHCPV6_RAS_PORT:
1332                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1333                         goto done;
1334                 default:
1335                         break;
1336                 }
1337         }
1338 done:
1339         /* For packets over a vlan, which are destined
1340          * to BMC, asic expects the vlan to be inline in the packet.
1341          */
1342         if (os2bmc)
1343                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1344
1345         return os2bmc;
1346 }
1347
1348 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1349 {
1350         struct be_adapter *adapter = netdev_priv(netdev);
1351         u16 q_idx = skb_get_queue_mapping(skb);
1352         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1353         struct be_wrb_params wrb_params = { 0 };
1354         bool flush = !skb->xmit_more;
1355         u16 wrb_cnt;
1356
1357         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1358         if (unlikely(!skb))
1359                 goto drop;
1360
1361         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1362
1363         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1364         if (unlikely(!wrb_cnt)) {
1365                 dev_kfree_skb_any(skb);
1366                 goto drop;
1367         }
1368
1369         /* if os2bmc is enabled and if the pkt is destined to bmc,
1370          * enqueue the pkt a 2nd time with mgmt bit set.
1371          */
1372         if (be_send_pkt_to_bmc(adapter, &skb)) {
1373                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1374                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1375                 if (unlikely(!wrb_cnt))
1376                         goto drop;
1377                 else
1378                         skb_get(skb);
1379         }
1380
1381         if (be_is_txq_full(txo)) {
1382                 netif_stop_subqueue(netdev, q_idx);
1383                 tx_stats(txo)->tx_stops++;
1384         }
1385
1386         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1387                 be_xmit_flush(adapter, txo);
1388
1389         return NETDEV_TX_OK;
1390 drop:
1391         tx_stats(txo)->tx_drv_drops++;
1392         /* Flush the already enqueued tx requests */
1393         if (flush && txo->pend_wrb_cnt)
1394                 be_xmit_flush(adapter, txo);
1395
1396         return NETDEV_TX_OK;
1397 }
1398
1399 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1400 {
1401         struct be_adapter *adapter = netdev_priv(netdev);
1402         struct device *dev = &adapter->pdev->dev;
1403
1404         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1405                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1406                          BE_MIN_MTU, BE_MAX_MTU);
1407                 return -EINVAL;
1408         }
1409
1410         dev_info(dev, "MTU changed from %d to %d bytes\n",
1411                  netdev->mtu, new_mtu);
1412         netdev->mtu = new_mtu;
1413         return 0;
1414 }
1415
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424         struct device *dev = &adapter->pdev->dev;
1425         int status;
1426
1427         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428                 return 0;
1429
1430         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431         if (!status) {
1432                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434         } else {
1435                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436         }
1437         return status;
1438 }
1439
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446         if (!status) {
1447                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449         }
1450         return status;
1451 }
1452
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459         struct device *dev = &adapter->pdev->dev;
1460         u16 vids[BE_NUM_VLANS_SUPPORTED];
1461         u16 num = 0, i = 0;
1462         int status = 0;
1463
1464         /* No need to change the VLAN state if the I/F is in promiscuous */
1465         if (adapter->netdev->flags & IFF_PROMISC)
1466                 return 0;
1467
1468         if (adapter->vlans_added > be_max_vlans(adapter))
1469                 return be_set_vlan_promisc(adapter);
1470
1471         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472                 status = be_clear_vlan_promisc(adapter);
1473                 if (status)
1474                         return status;
1475         }
1476         /* Construct VLAN Table to give to HW */
1477         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478                 vids[num++] = cpu_to_le16(i);
1479
1480         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481         if (status) {
1482                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1483                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1484                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485                     addl_status(status) ==
1486                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487                         return be_set_vlan_promisc(adapter);
1488         }
1489         return status;
1490 }
1491
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494         struct be_adapter *adapter = netdev_priv(netdev);
1495         int status = 0;
1496
1497         mutex_lock(&adapter->rx_filter_lock);
1498
1499         /* Packets with VID 0 are always received by Lancer by default */
1500         if (lancer_chip(adapter) && vid == 0)
1501                 goto done;
1502
1503         if (test_bit(vid, adapter->vids))
1504                 goto done;
1505
1506         set_bit(vid, adapter->vids);
1507         adapter->vlans_added++;
1508
1509         status = be_vid_config(adapter);
1510 done:
1511         mutex_unlock(&adapter->rx_filter_lock);
1512         return status;
1513 }
1514
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517         struct be_adapter *adapter = netdev_priv(netdev);
1518         int status = 0;
1519
1520         mutex_lock(&adapter->rx_filter_lock);
1521
1522         /* Packets with VID 0 are always received by Lancer by default */
1523         if (lancer_chip(adapter) && vid == 0)
1524                 goto done;
1525
1526         if (!test_bit(vid, adapter->vids))
1527                 goto done;
1528
1529         clear_bit(vid, adapter->vids);
1530         adapter->vlans_added--;
1531
1532         status = be_vid_config(adapter);
1533 done:
1534         mutex_unlock(&adapter->rx_filter_lock);
1535         return status;
1536 }
1537
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546         int status;
1547
1548         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549                 return;
1550
1551         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552         if (!status)
1553                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558         int status;
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561                 return;
1562
1563         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564         if (!status)
1565                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570         int status;
1571
1572         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573                 return;
1574
1575         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576         if (!status)
1577                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_uc_list = true;
1591         return 0;
1592 }
1593
1594 static int be_mc_list_update(struct net_device *netdev,
1595                              const unsigned char *addr)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598
1599         adapter->update_mc_list = true;
1600         return 0;
1601 }
1602
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605         struct net_device *netdev = adapter->netdev;
1606         struct netdev_hw_addr *ha;
1607         bool mc_promisc = false;
1608         int status;
1609
1610         netif_addr_lock_bh(netdev);
1611         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612
1613         if (netdev->flags & IFF_PROMISC) {
1614                 adapter->update_mc_list = false;
1615         } else if (netdev->flags & IFF_ALLMULTI ||
1616                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617                 /* Enable multicast promisc if num configured exceeds
1618                  * what we support
1619                  */
1620                 mc_promisc = true;
1621                 adapter->update_mc_list = false;
1622         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623                 /* Update mc-list unconditionally if the iface was previously
1624                  * in mc-promisc mode and now is out of that mode.
1625                  */
1626                 adapter->update_mc_list = true;
1627         }
1628
1629         if (adapter->update_mc_list) {
1630                 int i = 0;
1631
1632                 /* cache the mc-list in adapter */
1633                 netdev_for_each_mc_addr(ha, netdev) {
1634                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635                         i++;
1636                 }
1637                 adapter->mc_count = netdev_mc_count(netdev);
1638         }
1639         netif_addr_unlock_bh(netdev);
1640
1641         if (mc_promisc) {
1642                 be_set_mc_promisc(adapter);
1643         } else if (adapter->update_mc_list) {
1644                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645                 if (!status)
1646                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647                 else
1648                         be_set_mc_promisc(adapter);
1649
1650                 adapter->update_mc_list = false;
1651         }
1652 }
1653
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656         struct net_device *netdev = adapter->netdev;
1657
1658         __dev_mc_unsync(netdev, NULL);
1659         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660         adapter->mc_count = 0;
1661 }
1662
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665         if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1666                              adapter->dev_mac)) {
1667                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1668                 return 0;
1669         }
1670
1671         return be_cmd_pmac_add(adapter,
1672                                (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
1673                                adapter->if_handle,
1674                                &adapter->pmac_id[uc_idx + 1], 0);
1675 }
1676
1677 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1678 {
1679         if (pmac_id == adapter->pmac_id[0])
1680                 return;
1681
1682         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1683 }
1684
1685 static void be_set_uc_list(struct be_adapter *adapter)
1686 {
1687         struct net_device *netdev = adapter->netdev;
1688         struct netdev_hw_addr *ha;
1689         bool uc_promisc = false;
1690         int curr_uc_macs = 0, i;
1691
1692         netif_addr_lock_bh(netdev);
1693         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1694
1695         if (netdev->flags & IFF_PROMISC) {
1696                 adapter->update_uc_list = false;
1697         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1698                 uc_promisc = true;
1699                 adapter->update_uc_list = false;
1700         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1701                 /* Update uc-list unconditionally if the iface was previously
1702                  * in uc-promisc mode and now is out of that mode.
1703                  */
1704                 adapter->update_uc_list = true;
1705         }
1706
1707         if (adapter->update_uc_list) {
1708                 i = 1; /* First slot is claimed by the Primary MAC */
1709
1710                 /* cache the uc-list in adapter array */
1711                 netdev_for_each_uc_addr(ha, netdev) {
1712                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1713                         i++;
1714                 }
1715                 curr_uc_macs = netdev_uc_count(netdev);
1716         }
1717         netif_addr_unlock_bh(netdev);
1718
1719         if (uc_promisc) {
1720                 be_set_uc_promisc(adapter);
1721         } else if (adapter->update_uc_list) {
1722                 be_clear_uc_promisc(adapter);
1723
1724                 for (i = 0; i < adapter->uc_macs; i++)
1725                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1726
1727                 for (i = 0; i < curr_uc_macs; i++)
1728                         be_uc_mac_add(adapter, i);
1729                 adapter->uc_macs = curr_uc_macs;
1730                 adapter->update_uc_list = false;
1731         }
1732 }
1733
1734 static void be_clear_uc_list(struct be_adapter *adapter)
1735 {
1736         struct net_device *netdev = adapter->netdev;
1737         int i;
1738
1739         __dev_uc_unsync(netdev, NULL);
1740         for (i = 0; i < adapter->uc_macs; i++)
1741                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1742
1743         adapter->uc_macs = 0;
1744 }
1745
1746 static void __be_set_rx_mode(struct be_adapter *adapter)
1747 {
1748         struct net_device *netdev = adapter->netdev;
1749
1750         mutex_lock(&adapter->rx_filter_lock);
1751
1752         if (netdev->flags & IFF_PROMISC) {
1753                 if (!be_in_all_promisc(adapter))
1754                         be_set_all_promisc(adapter);
1755         } else if (be_in_all_promisc(adapter)) {
1756                 /* We need to re-program the vlan-list or clear
1757                  * vlan-promisc mode (if needed) when the interface
1758                  * comes out of promisc mode.
1759                  */
1760                 be_vid_config(adapter);
1761         }
1762
1763         be_set_uc_list(adapter);
1764         be_set_mc_list(adapter);
1765
1766         mutex_unlock(&adapter->rx_filter_lock);
1767 }
1768
1769 static void be_work_set_rx_mode(struct work_struct *work)
1770 {
1771         struct be_cmd_work *cmd_work =
1772                                 container_of(work, struct be_cmd_work, work);
1773
1774         __be_set_rx_mode(cmd_work->adapter);
1775         kfree(cmd_work);
1776 }
1777
1778 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1779 {
1780         struct be_adapter *adapter = netdev_priv(netdev);
1781         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1782         int status;
1783
1784         if (!sriov_enabled(adapter))
1785                 return -EPERM;
1786
1787         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1788                 return -EINVAL;
1789
1790         /* Proceed further only if user provided MAC is different
1791          * from active MAC
1792          */
1793         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1794                 return 0;
1795
1796         if (BEx_chip(adapter)) {
1797                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1798                                 vf + 1);
1799
1800                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1801                                          &vf_cfg->pmac_id, vf + 1);
1802         } else {
1803                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1804                                         vf + 1);
1805         }
1806
1807         if (status) {
1808                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1809                         mac, vf, status);
1810                 return be_cmd_status(status);
1811         }
1812
1813         ether_addr_copy(vf_cfg->mac_addr, mac);
1814
1815         return 0;
1816 }
1817
1818 static int be_get_vf_config(struct net_device *netdev, int vf,
1819                             struct ifla_vf_info *vi)
1820 {
1821         struct be_adapter *adapter = netdev_priv(netdev);
1822         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1823
1824         if (!sriov_enabled(adapter))
1825                 return -EPERM;
1826
1827         if (vf >= adapter->num_vfs)
1828                 return -EINVAL;
1829
1830         vi->vf = vf;
1831         vi->max_tx_rate = vf_cfg->tx_rate;
1832         vi->min_tx_rate = 0;
1833         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1834         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1835         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1836         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1837         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1838
1839         return 0;
1840 }
1841
1842 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1843 {
1844         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1845         u16 vids[BE_NUM_VLANS_SUPPORTED];
1846         int vf_if_id = vf_cfg->if_handle;
1847         int status;
1848
1849         /* Enable Transparent VLAN Tagging */
1850         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1851         if (status)
1852                 return status;
1853
1854         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1855         vids[0] = 0;
1856         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1857         if (!status)
1858                 dev_info(&adapter->pdev->dev,
1859                          "Cleared guest VLANs on VF%d", vf);
1860
1861         /* After TVT is enabled, disallow VFs to program VLAN filters */
1862         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1863                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1864                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1865                 if (!status)
1866                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1867         }
1868         return 0;
1869 }
1870
1871 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1872 {
1873         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1874         struct device *dev = &adapter->pdev->dev;
1875         int status;
1876
1877         /* Reset Transparent VLAN Tagging. */
1878         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1879                                        vf_cfg->if_handle, 0, 0);
1880         if (status)
1881                 return status;
1882
1883         /* Allow VFs to program VLAN filtering */
1884         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1885                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1886                                                   BE_PRIV_FILTMGMT, vf + 1);
1887                 if (!status) {
1888                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1889                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1890                 }
1891         }
1892
1893         dev_info(dev,
1894                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1895         return 0;
1896 }
1897
1898 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1899                           __be16 vlan_proto)
1900 {
1901         struct be_adapter *adapter = netdev_priv(netdev);
1902         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1903         int status;
1904
1905         if (!sriov_enabled(adapter))
1906                 return -EPERM;
1907
1908         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1909                 return -EINVAL;
1910
1911         if (vlan_proto != htons(ETH_P_8021Q))
1912                 return -EPROTONOSUPPORT;
1913
1914         if (vlan || qos) {
1915                 vlan |= qos << VLAN_PRIO_SHIFT;
1916                 status = be_set_vf_tvt(adapter, vf, vlan);
1917         } else {
1918                 status = be_clear_vf_tvt(adapter, vf);
1919         }
1920
1921         if (status) {
1922                 dev_err(&adapter->pdev->dev,
1923                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1924                         status);
1925                 return be_cmd_status(status);
1926         }
1927
1928         vf_cfg->vlan_tag = vlan;
1929         return 0;
1930 }
1931
1932 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1933                              int min_tx_rate, int max_tx_rate)
1934 {
1935         struct be_adapter *adapter = netdev_priv(netdev);
1936         struct device *dev = &adapter->pdev->dev;
1937         int percent_rate, status = 0;
1938         u16 link_speed = 0;
1939         u8 link_status;
1940
1941         if (!sriov_enabled(adapter))
1942                 return -EPERM;
1943
1944         if (vf >= adapter->num_vfs)
1945                 return -EINVAL;
1946
1947         if (min_tx_rate)
1948                 return -EINVAL;
1949
1950         if (!max_tx_rate)
1951                 goto config_qos;
1952
1953         status = be_cmd_link_status_query(adapter, &link_speed,
1954                                           &link_status, 0);
1955         if (status)
1956                 goto err;
1957
1958         if (!link_status) {
1959                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1960                 status = -ENETDOWN;
1961                 goto err;
1962         }
1963
1964         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1965                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1966                         link_speed);
1967                 status = -EINVAL;
1968                 goto err;
1969         }
1970
1971         /* On Skyhawk the QOS setting must be done only as a % value */
1972         percent_rate = link_speed / 100;
1973         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1974                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1975                         percent_rate);
1976                 status = -EINVAL;
1977                 goto err;
1978         }
1979
1980 config_qos:
1981         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1982         if (status)
1983                 goto err;
1984
1985         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1986         return 0;
1987
1988 err:
1989         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1990                 max_tx_rate, vf);
1991         return be_cmd_status(status);
1992 }
1993
1994 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1995                                 int link_state)
1996 {
1997         struct be_adapter *adapter = netdev_priv(netdev);
1998         int status;
1999
2000         if (!sriov_enabled(adapter))
2001                 return -EPERM;
2002
2003         if (vf >= adapter->num_vfs)
2004                 return -EINVAL;
2005
2006         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2007         if (status) {
2008                 dev_err(&adapter->pdev->dev,
2009                         "Link state change on VF %d failed: %#x\n", vf, status);
2010                 return be_cmd_status(status);
2011         }
2012
2013         adapter->vf_cfg[vf].plink_tracking = link_state;
2014
2015         return 0;
2016 }
2017
2018 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2019 {
2020         struct be_adapter *adapter = netdev_priv(netdev);
2021         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2022         u8 spoofchk;
2023         int status;
2024
2025         if (!sriov_enabled(adapter))
2026                 return -EPERM;
2027
2028         if (vf >= adapter->num_vfs)
2029                 return -EINVAL;
2030
2031         if (BEx_chip(adapter))
2032                 return -EOPNOTSUPP;
2033
2034         if (enable == vf_cfg->spoofchk)
2035                 return 0;
2036
2037         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2038
2039         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2040                                        0, spoofchk);
2041         if (status) {
2042                 dev_err(&adapter->pdev->dev,
2043                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2044                 return be_cmd_status(status);
2045         }
2046
2047         vf_cfg->spoofchk = enable;
2048         return 0;
2049 }
2050
2051 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2052                           ulong now)
2053 {
2054         aic->rx_pkts_prev = rx_pkts;
2055         aic->tx_reqs_prev = tx_pkts;
2056         aic->jiffies = now;
2057 }
2058
2059 static int be_get_new_eqd(struct be_eq_obj *eqo)
2060 {
2061         struct be_adapter *adapter = eqo->adapter;
2062         int eqd, start;
2063         struct be_aic_obj *aic;
2064         struct be_rx_obj *rxo;
2065         struct be_tx_obj *txo;
2066         u64 rx_pkts = 0, tx_pkts = 0;
2067         ulong now;
2068         u32 pps, delta;
2069         int i;
2070
2071         aic = &adapter->aic_obj[eqo->idx];
2072         if (!aic->enable) {
2073                 if (aic->jiffies)
2074                         aic->jiffies = 0;
2075                 eqd = aic->et_eqd;
2076                 return eqd;
2077         }
2078
2079         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2080                 do {
2081                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2082                         rx_pkts += rxo->stats.rx_pkts;
2083                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2084         }
2085
2086         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2087                 do {
2088                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2089                         tx_pkts += txo->stats.tx_reqs;
2090                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2091         }
2092
2093         /* Skip, if wrapped around or first calculation */
2094         now = jiffies;
2095         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2096             rx_pkts < aic->rx_pkts_prev ||
2097             tx_pkts < aic->tx_reqs_prev) {
2098                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2099                 return aic->prev_eqd;
2100         }
2101
2102         delta = jiffies_to_msecs(now - aic->jiffies);
2103         if (delta == 0)
2104                 return aic->prev_eqd;
2105
2106         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2107                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2108         eqd = (pps / 15000) << 2;
2109
2110         if (eqd < 8)
2111                 eqd = 0;
2112         eqd = min_t(u32, eqd, aic->max_eqd);
2113         eqd = max_t(u32, eqd, aic->min_eqd);
2114
2115         be_aic_update(aic, rx_pkts, tx_pkts, now);
2116
2117         return eqd;
2118 }
2119
2120 /* For Skyhawk-R only */
2121 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2122 {
2123         struct be_adapter *adapter = eqo->adapter;
2124         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2125         ulong now = jiffies;
2126         int eqd;
2127         u32 mult_enc;
2128
2129         if (!aic->enable)
2130                 return 0;
2131
2132         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2133                 eqd = aic->prev_eqd;
2134         else
2135                 eqd = be_get_new_eqd(eqo);
2136
2137         if (eqd > 100)
2138                 mult_enc = R2I_DLY_ENC_1;
2139         else if (eqd > 60)
2140                 mult_enc = R2I_DLY_ENC_2;
2141         else if (eqd > 20)
2142                 mult_enc = R2I_DLY_ENC_3;
2143         else
2144                 mult_enc = R2I_DLY_ENC_0;
2145
2146         aic->prev_eqd = eqd;
2147
2148         return mult_enc;
2149 }
2150
2151 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2152 {
2153         struct be_set_eqd set_eqd[MAX_EVT_QS];
2154         struct be_aic_obj *aic;
2155         struct be_eq_obj *eqo;
2156         int i, num = 0, eqd;
2157
2158         for_all_evt_queues(adapter, eqo, i) {
2159                 aic = &adapter->aic_obj[eqo->idx];
2160                 eqd = be_get_new_eqd(eqo);
2161                 if (force_update || eqd != aic->prev_eqd) {
2162                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2163                         set_eqd[num].eq_id = eqo->q.id;
2164                         aic->prev_eqd = eqd;
2165                         num++;
2166                 }
2167         }
2168
2169         if (num)
2170                 be_cmd_modify_eqd(adapter, set_eqd, num);
2171 }
2172
2173 static void be_rx_stats_update(struct be_rx_obj *rxo,
2174                                struct be_rx_compl_info *rxcp)
2175 {
2176         struct be_rx_stats *stats = rx_stats(rxo);
2177
2178         u64_stats_update_begin(&stats->sync);
2179         stats->rx_compl++;
2180         stats->rx_bytes += rxcp->pkt_size;
2181         stats->rx_pkts++;
2182         if (rxcp->tunneled)
2183                 stats->rx_vxlan_offload_pkts++;
2184         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2185                 stats->rx_mcast_pkts++;
2186         if (rxcp->err)
2187                 stats->rx_compl_err++;
2188         u64_stats_update_end(&stats->sync);
2189 }
2190
2191 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2192 {
2193         /* L4 checksum is not reliable for non TCP/UDP packets.
2194          * Also ignore ipcksm for ipv6 pkts
2195          */
2196         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2197                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2198 }
2199
2200 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2201 {
2202         struct be_adapter *adapter = rxo->adapter;
2203         struct be_rx_page_info *rx_page_info;
2204         struct be_queue_info *rxq = &rxo->q;
2205         u32 frag_idx = rxq->tail;
2206
2207         rx_page_info = &rxo->page_info_tbl[frag_idx];
2208         BUG_ON(!rx_page_info->page);
2209
2210         if (rx_page_info->last_frag) {
2211                 dma_unmap_page(&adapter->pdev->dev,
2212                                dma_unmap_addr(rx_page_info, bus),
2213                                adapter->big_page_size, DMA_FROM_DEVICE);
2214                 rx_page_info->last_frag = false;
2215         } else {
2216                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2217                                         dma_unmap_addr(rx_page_info, bus),
2218                                         rx_frag_size, DMA_FROM_DEVICE);
2219         }
2220
2221         queue_tail_inc(rxq);
2222         atomic_dec(&rxq->used);
2223         return rx_page_info;
2224 }
2225
2226 /* Throwaway the data in the Rx completion */
2227 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2228                                 struct be_rx_compl_info *rxcp)
2229 {
2230         struct be_rx_page_info *page_info;
2231         u16 i, num_rcvd = rxcp->num_rcvd;
2232
2233         for (i = 0; i < num_rcvd; i++) {
2234                 page_info = get_rx_page_info(rxo);
2235                 put_page(page_info->page);
2236                 memset(page_info, 0, sizeof(*page_info));
2237         }
2238 }
2239
2240 /*
2241  * skb_fill_rx_data forms a complete skb for an ether frame
2242  * indicated by rxcp.
2243  */
2244 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2245                              struct be_rx_compl_info *rxcp)
2246 {
2247         struct be_rx_page_info *page_info;
2248         u16 i, j;
2249         u16 hdr_len, curr_frag_len, remaining;
2250         u8 *start;
2251
2252         page_info = get_rx_page_info(rxo);
2253         start = page_address(page_info->page) + page_info->page_offset;
2254         prefetch(start);
2255
2256         /* Copy data in the first descriptor of this completion */
2257         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2258
2259         skb->len = curr_frag_len;
2260         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2261                 memcpy(skb->data, start, curr_frag_len);
2262                 /* Complete packet has now been moved to data */
2263                 put_page(page_info->page);
2264                 skb->data_len = 0;
2265                 skb->tail += curr_frag_len;
2266         } else {
2267                 hdr_len = ETH_HLEN;
2268                 memcpy(skb->data, start, hdr_len);
2269                 skb_shinfo(skb)->nr_frags = 1;
2270                 skb_frag_set_page(skb, 0, page_info->page);
2271                 skb_shinfo(skb)->frags[0].page_offset =
2272                                         page_info->page_offset + hdr_len;
2273                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2274                                   curr_frag_len - hdr_len);
2275                 skb->data_len = curr_frag_len - hdr_len;
2276                 skb->truesize += rx_frag_size;
2277                 skb->tail += hdr_len;
2278         }
2279         page_info->page = NULL;
2280
2281         if (rxcp->pkt_size <= rx_frag_size) {
2282                 BUG_ON(rxcp->num_rcvd != 1);
2283                 return;
2284         }
2285
2286         /* More frags present for this completion */
2287         remaining = rxcp->pkt_size - curr_frag_len;
2288         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2289                 page_info = get_rx_page_info(rxo);
2290                 curr_frag_len = min(remaining, rx_frag_size);
2291
2292                 /* Coalesce all frags from the same physical page in one slot */
2293                 if (page_info->page_offset == 0) {
2294                         /* Fresh page */
2295                         j++;
2296                         skb_frag_set_page(skb, j, page_info->page);
2297                         skb_shinfo(skb)->frags[j].page_offset =
2298                                                         page_info->page_offset;
2299                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2300                         skb_shinfo(skb)->nr_frags++;
2301                 } else {
2302                         put_page(page_info->page);
2303                 }
2304
2305                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2306                 skb->len += curr_frag_len;
2307                 skb->data_len += curr_frag_len;
2308                 skb->truesize += rx_frag_size;
2309                 remaining -= curr_frag_len;
2310                 page_info->page = NULL;
2311         }
2312         BUG_ON(j > MAX_SKB_FRAGS);
2313 }
2314
2315 /* Process the RX completion indicated by rxcp when GRO is disabled */
2316 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2317                                 struct be_rx_compl_info *rxcp)
2318 {
2319         struct be_adapter *adapter = rxo->adapter;
2320         struct net_device *netdev = adapter->netdev;
2321         struct sk_buff *skb;
2322
2323         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2324         if (unlikely(!skb)) {
2325                 rx_stats(rxo)->rx_drops_no_skbs++;
2326                 be_rx_compl_discard(rxo, rxcp);
2327                 return;
2328         }
2329
2330         skb_fill_rx_data(rxo, skb, rxcp);
2331
2332         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2333                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2334         else
2335                 skb_checksum_none_assert(skb);
2336
2337         skb->protocol = eth_type_trans(skb, netdev);
2338         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2339         if (netdev->features & NETIF_F_RXHASH)
2340                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2341
2342         skb->csum_level = rxcp->tunneled;
2343         skb_mark_napi_id(skb, napi);
2344
2345         if (rxcp->vlanf)
2346                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2347
2348         netif_receive_skb(skb);
2349 }
2350
2351 /* Process the RX completion indicated by rxcp when GRO is enabled */
2352 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2353                                     struct napi_struct *napi,
2354                                     struct be_rx_compl_info *rxcp)
2355 {
2356         struct be_adapter *adapter = rxo->adapter;
2357         struct be_rx_page_info *page_info;
2358         struct sk_buff *skb = NULL;
2359         u16 remaining, curr_frag_len;
2360         u16 i, j;
2361
2362         skb = napi_get_frags(napi);
2363         if (!skb) {
2364                 be_rx_compl_discard(rxo, rxcp);
2365                 return;
2366         }
2367
2368         remaining = rxcp->pkt_size;
2369         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2370                 page_info = get_rx_page_info(rxo);
2371
2372                 curr_frag_len = min(remaining, rx_frag_size);
2373
2374                 /* Coalesce all frags from the same physical page in one slot */
2375                 if (i == 0 || page_info->page_offset == 0) {
2376                         /* First frag or Fresh page */
2377                         j++;
2378                         skb_frag_set_page(skb, j, page_info->page);
2379                         skb_shinfo(skb)->frags[j].page_offset =
2380                                                         page_info->page_offset;
2381                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2382                 } else {
2383                         put_page(page_info->page);
2384                 }
2385                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2386                 skb->truesize += rx_frag_size;
2387                 remaining -= curr_frag_len;
2388                 memset(page_info, 0, sizeof(*page_info));
2389         }
2390         BUG_ON(j > MAX_SKB_FRAGS);
2391
2392         skb_shinfo(skb)->nr_frags = j + 1;
2393         skb->len = rxcp->pkt_size;
2394         skb->data_len = rxcp->pkt_size;
2395         skb->ip_summed = CHECKSUM_UNNECESSARY;
2396         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2397         if (adapter->netdev->features & NETIF_F_RXHASH)
2398                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2399
2400         skb->csum_level = rxcp->tunneled;
2401
2402         if (rxcp->vlanf)
2403                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2404
2405         napi_gro_frags(napi);
2406 }
2407
2408 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2409                                  struct be_rx_compl_info *rxcp)
2410 {
2411         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2412         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2413         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2414         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2415         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2416         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2417         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2418         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2419         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2420         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2421         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2422         if (rxcp->vlanf) {
2423                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2424                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2425         }
2426         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2427         rxcp->tunneled =
2428                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2429 }
2430
2431 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2432                                  struct be_rx_compl_info *rxcp)
2433 {
2434         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2435         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2436         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2437         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2438         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2439         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2440         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2441         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2442         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2443         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2444         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2445         if (rxcp->vlanf) {
2446                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2447                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2448         }
2449         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2450         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2451 }
2452
2453 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2454 {
2455         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2456         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2457         struct be_adapter *adapter = rxo->adapter;
2458
2459         /* For checking the valid bit it is Ok to use either definition as the
2460          * valid bit is at the same position in both v0 and v1 Rx compl */
2461         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2462                 return NULL;
2463
2464         rmb();
2465         be_dws_le_to_cpu(compl, sizeof(*compl));
2466
2467         if (adapter->be3_native)
2468                 be_parse_rx_compl_v1(compl, rxcp);
2469         else
2470                 be_parse_rx_compl_v0(compl, rxcp);
2471
2472         if (rxcp->ip_frag)
2473                 rxcp->l4_csum = 0;
2474
2475         if (rxcp->vlanf) {
2476                 /* In QNQ modes, if qnq bit is not set, then the packet was
2477                  * tagged only with the transparent outer vlan-tag and must
2478                  * not be treated as a vlan packet by host
2479                  */
2480                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2481                         rxcp->vlanf = 0;
2482
2483                 if (!lancer_chip(adapter))
2484                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2485
2486                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2487                     !test_bit(rxcp->vlan_tag, adapter->vids))
2488                         rxcp->vlanf = 0;
2489         }
2490
2491         /* As the compl has been parsed, reset it; we wont touch it again */
2492         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2493
2494         queue_tail_inc(&rxo->cq);
2495         return rxcp;
2496 }
2497
2498 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2499 {
2500         u32 order = get_order(size);
2501
2502         if (order > 0)
2503                 gfp |= __GFP_COMP;
2504         return  alloc_pages(gfp, order);
2505 }
2506
2507 /*
2508  * Allocate a page, split it to fragments of size rx_frag_size and post as
2509  * receive buffers to BE
2510  */
2511 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2512 {
2513         struct be_adapter *adapter = rxo->adapter;
2514         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2515         struct be_queue_info *rxq = &rxo->q;
2516         struct page *pagep = NULL;
2517         struct device *dev = &adapter->pdev->dev;
2518         struct be_eth_rx_d *rxd;
2519         u64 page_dmaaddr = 0, frag_dmaaddr;
2520         u32 posted, page_offset = 0, notify = 0;
2521
2522         page_info = &rxo->page_info_tbl[rxq->head];
2523         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2524                 if (!pagep) {
2525                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2526                         if (unlikely(!pagep)) {
2527                                 rx_stats(rxo)->rx_post_fail++;
2528                                 break;
2529                         }
2530                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2531                                                     adapter->big_page_size,
2532                                                     DMA_FROM_DEVICE);
2533                         if (dma_mapping_error(dev, page_dmaaddr)) {
2534                                 put_page(pagep);
2535                                 pagep = NULL;
2536                                 adapter->drv_stats.dma_map_errors++;
2537                                 break;
2538                         }
2539                         page_offset = 0;
2540                 } else {
2541                         get_page(pagep);
2542                         page_offset += rx_frag_size;
2543                 }
2544                 page_info->page_offset = page_offset;
2545                 page_info->page = pagep;
2546
2547                 rxd = queue_head_node(rxq);
2548                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2549                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2550                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2551
2552                 /* Any space left in the current big page for another frag? */
2553                 if ((page_offset + rx_frag_size + rx_frag_size) >
2554                                         adapter->big_page_size) {
2555                         pagep = NULL;
2556                         page_info->last_frag = true;
2557                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2558                 } else {
2559                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2560                 }
2561
2562                 prev_page_info = page_info;
2563                 queue_head_inc(rxq);
2564                 page_info = &rxo->page_info_tbl[rxq->head];
2565         }
2566
2567         /* Mark the last frag of a page when we break out of the above loop
2568          * with no more slots available in the RXQ
2569          */
2570         if (pagep) {
2571                 prev_page_info->last_frag = true;
2572                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2573         }
2574
2575         if (posted) {
2576                 atomic_add(posted, &rxq->used);
2577                 if (rxo->rx_post_starved)
2578                         rxo->rx_post_starved = false;
2579                 do {
2580                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2581                         be_rxq_notify(adapter, rxq->id, notify);
2582                         posted -= notify;
2583                 } while (posted);
2584         } else if (atomic_read(&rxq->used) == 0) {
2585                 /* Let be_worker replenish when memory is available */
2586                 rxo->rx_post_starved = true;
2587         }
2588 }
2589
2590 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2591 {
2592         struct be_queue_info *tx_cq = &txo->cq;
2593         struct be_tx_compl_info *txcp = &txo->txcp;
2594         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2595
2596         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2597                 return NULL;
2598
2599         /* Ensure load ordering of valid bit dword and other dwords below */
2600         rmb();
2601         be_dws_le_to_cpu(compl, sizeof(*compl));
2602
2603         txcp->status = GET_TX_COMPL_BITS(status, compl);
2604         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2605
2606         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2607         queue_tail_inc(tx_cq);
2608         return txcp;
2609 }
2610
2611 static u16 be_tx_compl_process(struct be_adapter *adapter,
2612                                struct be_tx_obj *txo, u16 last_index)
2613 {
2614         struct sk_buff **sent_skbs = txo->sent_skb_list;
2615         struct be_queue_info *txq = &txo->q;
2616         struct sk_buff *skb = NULL;
2617         bool unmap_skb_hdr = false;
2618         struct be_eth_wrb *wrb;
2619         u16 num_wrbs = 0;
2620         u32 frag_index;
2621
2622         do {
2623                 if (sent_skbs[txq->tail]) {
2624                         /* Free skb from prev req */
2625                         if (skb)
2626                                 dev_consume_skb_any(skb);
2627                         skb = sent_skbs[txq->tail];
2628                         sent_skbs[txq->tail] = NULL;
2629                         queue_tail_inc(txq);  /* skip hdr wrb */
2630                         num_wrbs++;
2631                         unmap_skb_hdr = true;
2632                 }
2633                 wrb = queue_tail_node(txq);
2634                 frag_index = txq->tail;
2635                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2636                               (unmap_skb_hdr && skb_headlen(skb)));
2637                 unmap_skb_hdr = false;
2638                 queue_tail_inc(txq);
2639                 num_wrbs++;
2640         } while (frag_index != last_index);
2641         dev_consume_skb_any(skb);
2642
2643         return num_wrbs;
2644 }
2645
2646 /* Return the number of events in the event queue */
2647 static inline int events_get(struct be_eq_obj *eqo)
2648 {
2649         struct be_eq_entry *eqe;
2650         int num = 0;
2651
2652         do {
2653                 eqe = queue_tail_node(&eqo->q);
2654                 if (eqe->evt == 0)
2655                         break;
2656
2657                 rmb();
2658                 eqe->evt = 0;
2659                 num++;
2660                 queue_tail_inc(&eqo->q);
2661         } while (true);
2662
2663         return num;
2664 }
2665
2666 /* Leaves the EQ is disarmed state */
2667 static void be_eq_clean(struct be_eq_obj *eqo)
2668 {
2669         int num = events_get(eqo);
2670
2671         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2672 }
2673
2674 /* Free posted rx buffers that were not used */
2675 static void be_rxq_clean(struct be_rx_obj *rxo)
2676 {
2677         struct be_queue_info *rxq = &rxo->q;
2678         struct be_rx_page_info *page_info;
2679
2680         while (atomic_read(&rxq->used) > 0) {
2681                 page_info = get_rx_page_info(rxo);
2682                 put_page(page_info->page);
2683                 memset(page_info, 0, sizeof(*page_info));
2684         }
2685         BUG_ON(atomic_read(&rxq->used));
2686         rxq->tail = 0;
2687         rxq->head = 0;
2688 }
2689
2690 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2691 {
2692         struct be_queue_info *rx_cq = &rxo->cq;
2693         struct be_rx_compl_info *rxcp;
2694         struct be_adapter *adapter = rxo->adapter;
2695         int flush_wait = 0;
2696
2697         /* Consume pending rx completions.
2698          * Wait for the flush completion (identified by zero num_rcvd)
2699          * to arrive. Notify CQ even when there are no more CQ entries
2700          * for HW to flush partially coalesced CQ entries.
2701          * In Lancer, there is no need to wait for flush compl.
2702          */
2703         for (;;) {
2704                 rxcp = be_rx_compl_get(rxo);
2705                 if (!rxcp) {
2706                         if (lancer_chip(adapter))
2707                                 break;
2708
2709                         if (flush_wait++ > 50 ||
2710                             be_check_error(adapter,
2711                                            BE_ERROR_HW)) {
2712                                 dev_warn(&adapter->pdev->dev,
2713                                          "did not receive flush compl\n");
2714                                 break;
2715                         }
2716                         be_cq_notify(adapter, rx_cq->id, true, 0);
2717                         mdelay(1);
2718                 } else {
2719                         be_rx_compl_discard(rxo, rxcp);
2720                         be_cq_notify(adapter, rx_cq->id, false, 1);
2721                         if (rxcp->num_rcvd == 0)
2722                                 break;
2723                 }
2724         }
2725
2726         /* After cleanup, leave the CQ in unarmed state */
2727         be_cq_notify(adapter, rx_cq->id, false, 0);
2728 }
2729
2730 static void be_tx_compl_clean(struct be_adapter *adapter)
2731 {
2732         struct device *dev = &adapter->pdev->dev;
2733         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2734         struct be_tx_compl_info *txcp;
2735         struct be_queue_info *txq;
2736         u32 end_idx, notified_idx;
2737         struct be_tx_obj *txo;
2738         int i, pending_txqs;
2739
2740         /* Stop polling for compls when HW has been silent for 10ms */
2741         do {
2742                 pending_txqs = adapter->num_tx_qs;
2743
2744                 for_all_tx_queues(adapter, txo, i) {
2745                         cmpl = 0;
2746                         num_wrbs = 0;
2747                         txq = &txo->q;
2748                         while ((txcp = be_tx_compl_get(txo))) {
2749                                 num_wrbs +=
2750                                         be_tx_compl_process(adapter, txo,
2751                                                             txcp->end_index);
2752                                 cmpl++;
2753                         }
2754                         if (cmpl) {
2755                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2756                                 atomic_sub(num_wrbs, &txq->used);
2757                                 timeo = 0;
2758                         }
2759                         if (!be_is_tx_compl_pending(txo))
2760                                 pending_txqs--;
2761                 }
2762
2763                 if (pending_txqs == 0 || ++timeo > 10 ||
2764                     be_check_error(adapter, BE_ERROR_HW))
2765                         break;
2766
2767                 mdelay(1);
2768         } while (true);
2769
2770         /* Free enqueued TX that was never notified to HW */
2771         for_all_tx_queues(adapter, txo, i) {
2772                 txq = &txo->q;
2773
2774                 if (atomic_read(&txq->used)) {
2775                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2776                                  i, atomic_read(&txq->used));
2777                         notified_idx = txq->tail;
2778                         end_idx = txq->tail;
2779                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2780                                   txq->len);
2781                         /* Use the tx-compl process logic to handle requests
2782                          * that were not sent to the HW.
2783                          */
2784                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2785                         atomic_sub(num_wrbs, &txq->used);
2786                         BUG_ON(atomic_read(&txq->used));
2787                         txo->pend_wrb_cnt = 0;
2788                         /* Since hw was never notified of these requests,
2789                          * reset TXQ indices
2790                          */
2791                         txq->head = notified_idx;
2792                         txq->tail = notified_idx;
2793                 }
2794         }
2795 }
2796
2797 static void be_evt_queues_destroy(struct be_adapter *adapter)
2798 {
2799         struct be_eq_obj *eqo;
2800         int i;
2801
2802         for_all_evt_queues(adapter, eqo, i) {
2803                 if (eqo->q.created) {
2804                         be_eq_clean(eqo);
2805                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2806                         napi_hash_del(&eqo->napi);
2807                         netif_napi_del(&eqo->napi);
2808                         free_cpumask_var(eqo->affinity_mask);
2809                 }
2810                 be_queue_free(adapter, &eqo->q);
2811         }
2812 }
2813
2814 static int be_evt_queues_create(struct be_adapter *adapter)
2815 {
2816         struct be_queue_info *eq;
2817         struct be_eq_obj *eqo;
2818         struct be_aic_obj *aic;
2819         int i, rc;
2820
2821         /* need enough EQs to service both RX and TX queues */
2822         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2823                                     max(adapter->cfg_num_rx_irqs,
2824                                         adapter->cfg_num_tx_irqs));
2825
2826         for_all_evt_queues(adapter, eqo, i) {
2827                 int numa_node = dev_to_node(&adapter->pdev->dev);
2828
2829                 aic = &adapter->aic_obj[i];
2830                 eqo->adapter = adapter;
2831                 eqo->idx = i;
2832                 aic->max_eqd = BE_MAX_EQD;
2833                 aic->enable = true;
2834
2835                 eq = &eqo->q;
2836                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2837                                     sizeof(struct be_eq_entry));
2838                 if (rc)
2839                         return rc;
2840
2841                 rc = be_cmd_eq_create(adapter, eqo);
2842                 if (rc)
2843                         return rc;
2844
2845                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2846                         return -ENOMEM;
2847                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2848                                 eqo->affinity_mask);
2849                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2850                                BE_NAPI_WEIGHT);
2851         }
2852         return 0;
2853 }
2854
2855 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2856 {
2857         struct be_queue_info *q;
2858
2859         q = &adapter->mcc_obj.q;
2860         if (q->created)
2861                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2862         be_queue_free(adapter, q);
2863
2864         q = &adapter->mcc_obj.cq;
2865         if (q->created)
2866                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2867         be_queue_free(adapter, q);
2868 }
2869
2870 /* Must be called only after TX qs are created as MCC shares TX EQ */
2871 static int be_mcc_queues_create(struct be_adapter *adapter)
2872 {
2873         struct be_queue_info *q, *cq;
2874
2875         cq = &adapter->mcc_obj.cq;
2876         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2877                            sizeof(struct be_mcc_compl)))
2878                 goto err;
2879
2880         /* Use the default EQ for MCC completions */
2881         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2882                 goto mcc_cq_free;
2883
2884         q = &adapter->mcc_obj.q;
2885         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2886                 goto mcc_cq_destroy;
2887
2888         if (be_cmd_mccq_create(adapter, q, cq))
2889                 goto mcc_q_free;
2890
2891         return 0;
2892
2893 mcc_q_free:
2894         be_queue_free(adapter, q);
2895 mcc_cq_destroy:
2896         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2897 mcc_cq_free:
2898         be_queue_free(adapter, cq);
2899 err:
2900         return -1;
2901 }
2902
2903 static void be_tx_queues_destroy(struct be_adapter *adapter)
2904 {
2905         struct be_queue_info *q;
2906         struct be_tx_obj *txo;
2907         u8 i;
2908
2909         for_all_tx_queues(adapter, txo, i) {
2910                 q = &txo->q;
2911                 if (q->created)
2912                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2913                 be_queue_free(adapter, q);
2914
2915                 q = &txo->cq;
2916                 if (q->created)
2917                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2918                 be_queue_free(adapter, q);
2919         }
2920 }
2921
2922 static int be_tx_qs_create(struct be_adapter *adapter)
2923 {
2924         struct be_queue_info *cq;
2925         struct be_tx_obj *txo;
2926         struct be_eq_obj *eqo;
2927         int status, i;
2928
2929         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2930
2931         for_all_tx_queues(adapter, txo, i) {
2932                 cq = &txo->cq;
2933                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2934                                         sizeof(struct be_eth_tx_compl));
2935                 if (status)
2936                         return status;
2937
2938                 u64_stats_init(&txo->stats.sync);
2939                 u64_stats_init(&txo->stats.sync_compl);
2940
2941                 /* If num_evt_qs is less than num_tx_qs, then more than
2942                  * one txq share an eq
2943                  */
2944                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2945                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2946                 if (status)
2947                         return status;
2948
2949                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2950                                         sizeof(struct be_eth_wrb));
2951                 if (status)
2952                         return status;
2953
2954                 status = be_cmd_txq_create(adapter, txo);
2955                 if (status)
2956                         return status;
2957
2958                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2959                                     eqo->idx);
2960         }
2961
2962         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2963                  adapter->num_tx_qs);
2964         return 0;
2965 }
2966
2967 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2968 {
2969         struct be_queue_info *q;
2970         struct be_rx_obj *rxo;
2971         int i;
2972
2973         for_all_rx_queues(adapter, rxo, i) {
2974                 q = &rxo->cq;
2975                 if (q->created)
2976                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2977                 be_queue_free(adapter, q);
2978         }
2979 }
2980
2981 static int be_rx_cqs_create(struct be_adapter *adapter)
2982 {
2983         struct be_queue_info *eq, *cq;
2984         struct be_rx_obj *rxo;
2985         int rc, i;
2986
2987         adapter->num_rss_qs =
2988                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2989
2990         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2991         if (adapter->num_rss_qs < 2)
2992                 adapter->num_rss_qs = 0;
2993
2994         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2995
2996         /* When the interface is not capable of RSS rings (and there is no
2997          * need to create a default RXQ) we'll still need one RXQ
2998          */
2999         if (adapter->num_rx_qs == 0)
3000                 adapter->num_rx_qs = 1;
3001
3002         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3003         for_all_rx_queues(adapter, rxo, i) {
3004                 rxo->adapter = adapter;
3005                 cq = &rxo->cq;
3006                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3007                                     sizeof(struct be_eth_rx_compl));
3008                 if (rc)
3009                         return rc;
3010
3011                 u64_stats_init(&rxo->stats.sync);
3012                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3013                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3014                 if (rc)
3015                         return rc;
3016         }
3017
3018         dev_info(&adapter->pdev->dev,
3019                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3020         return 0;
3021 }
3022
3023 static irqreturn_t be_intx(int irq, void *dev)
3024 {
3025         struct be_eq_obj *eqo = dev;
3026         struct be_adapter *adapter = eqo->adapter;
3027         int num_evts = 0;
3028
3029         /* IRQ is not expected when NAPI is scheduled as the EQ
3030          * will not be armed.
3031          * But, this can happen on Lancer INTx where it takes
3032          * a while to de-assert INTx or in BE2 where occasionaly
3033          * an interrupt may be raised even when EQ is unarmed.
3034          * If NAPI is already scheduled, then counting & notifying
3035          * events will orphan them.
3036          */
3037         if (napi_schedule_prep(&eqo->napi)) {
3038                 num_evts = events_get(eqo);
3039                 __napi_schedule(&eqo->napi);
3040                 if (num_evts)
3041                         eqo->spurious_intr = 0;
3042         }
3043         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3044
3045         /* Return IRQ_HANDLED only for the the first spurious intr
3046          * after a valid intr to stop the kernel from branding
3047          * this irq as a bad one!
3048          */
3049         if (num_evts || eqo->spurious_intr++ == 0)
3050                 return IRQ_HANDLED;
3051         else
3052                 return IRQ_NONE;
3053 }
3054
3055 static irqreturn_t be_msix(int irq, void *dev)
3056 {
3057         struct be_eq_obj *eqo = dev;
3058
3059         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3060         napi_schedule(&eqo->napi);
3061         return IRQ_HANDLED;
3062 }
3063
3064 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3065 {
3066         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3067 }
3068
3069 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3070                          int budget, int polling)
3071 {
3072         struct be_adapter *adapter = rxo->adapter;
3073         struct be_queue_info *rx_cq = &rxo->cq;
3074         struct be_rx_compl_info *rxcp;
3075         u32 work_done;
3076         u32 frags_consumed = 0;
3077
3078         for (work_done = 0; work_done < budget; work_done++) {
3079                 rxcp = be_rx_compl_get(rxo);
3080                 if (!rxcp)
3081                         break;
3082
3083                 /* Is it a flush compl that has no data */
3084                 if (unlikely(rxcp->num_rcvd == 0))
3085                         goto loop_continue;
3086
3087                 /* Discard compl with partial DMA Lancer B0 */
3088                 if (unlikely(!rxcp->pkt_size)) {
3089                         be_rx_compl_discard(rxo, rxcp);
3090                         goto loop_continue;
3091                 }
3092
3093                 /* On BE drop pkts that arrive due to imperfect filtering in
3094                  * promiscuous mode on some skews
3095                  */
3096                 if (unlikely(rxcp->port != adapter->port_num &&
3097                              !lancer_chip(adapter))) {
3098                         be_rx_compl_discard(rxo, rxcp);
3099                         goto loop_continue;
3100                 }
3101
3102                 /* Don't do gro when we're busy_polling */
3103                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3104                         be_rx_compl_process_gro(rxo, napi, rxcp);
3105                 else
3106                         be_rx_compl_process(rxo, napi, rxcp);
3107
3108 loop_continue:
3109                 frags_consumed += rxcp->num_rcvd;
3110                 be_rx_stats_update(rxo, rxcp);
3111         }
3112
3113         if (work_done) {
3114                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3115
3116                 /* When an rx-obj gets into post_starved state, just
3117                  * let be_worker do the posting.
3118                  */
3119                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3120                     !rxo->rx_post_starved)
3121                         be_post_rx_frags(rxo, GFP_ATOMIC,
3122                                          max_t(u32, MAX_RX_POST,
3123                                                frags_consumed));
3124         }
3125
3126         return work_done;
3127 }
3128
3129 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3130 {
3131         switch (status) {
3132         case BE_TX_COMP_HDR_PARSE_ERR:
3133                 tx_stats(txo)->tx_hdr_parse_err++;
3134                 break;
3135         case BE_TX_COMP_NDMA_ERR:
3136                 tx_stats(txo)->tx_dma_err++;
3137                 break;
3138         case BE_TX_COMP_ACL_ERR:
3139                 tx_stats(txo)->tx_spoof_check_err++;
3140                 break;
3141         }
3142 }
3143
3144 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3145 {
3146         switch (status) {
3147         case LANCER_TX_COMP_LSO_ERR:
3148                 tx_stats(txo)->tx_tso_err++;
3149                 break;
3150         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3151         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3152                 tx_stats(txo)->tx_spoof_check_err++;
3153                 break;
3154         case LANCER_TX_COMP_QINQ_ERR:
3155                 tx_stats(txo)->tx_qinq_err++;
3156                 break;
3157         case LANCER_TX_COMP_PARITY_ERR:
3158                 tx_stats(txo)->tx_internal_parity_err++;
3159                 break;
3160         case LANCER_TX_COMP_DMA_ERR:
3161                 tx_stats(txo)->tx_dma_err++;
3162                 break;
3163         }
3164 }
3165
3166 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3167                           int idx)
3168 {
3169         int num_wrbs = 0, work_done = 0;
3170         struct be_tx_compl_info *txcp;
3171
3172         while ((txcp = be_tx_compl_get(txo))) {
3173                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3174                 work_done++;
3175
3176                 if (txcp->status) {
3177                         if (lancer_chip(adapter))
3178                                 lancer_update_tx_err(txo, txcp->status);
3179                         else
3180                                 be_update_tx_err(txo, txcp->status);
3181                 }
3182         }
3183
3184         if (work_done) {
3185                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3186                 atomic_sub(num_wrbs, &txo->q.used);
3187
3188                 /* As Tx wrbs have been freed up, wake up netdev queue
3189                  * if it was stopped due to lack of tx wrbs.  */
3190                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3191                     be_can_txq_wake(txo)) {
3192                         netif_wake_subqueue(adapter->netdev, idx);
3193                 }
3194
3195                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3196                 tx_stats(txo)->tx_compl += work_done;
3197                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3198         }
3199 }
3200
3201 #ifdef CONFIG_NET_RX_BUSY_POLL
3202 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3203 {
3204         bool status = true;
3205
3206         spin_lock(&eqo->lock); /* BH is already disabled */
3207         if (eqo->state & BE_EQ_LOCKED) {
3208                 WARN_ON(eqo->state & BE_EQ_NAPI);
3209                 eqo->state |= BE_EQ_NAPI_YIELD;
3210                 status = false;
3211         } else {
3212                 eqo->state = BE_EQ_NAPI;
3213         }
3214         spin_unlock(&eqo->lock);
3215         return status;
3216 }
3217
3218 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3219 {
3220         spin_lock(&eqo->lock); /* BH is already disabled */
3221
3222         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3223         eqo->state = BE_EQ_IDLE;
3224
3225         spin_unlock(&eqo->lock);
3226 }
3227
3228 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3229 {
3230         bool status = true;
3231
3232         spin_lock_bh(&eqo->lock);
3233         if (eqo->state & BE_EQ_LOCKED) {
3234                 eqo->state |= BE_EQ_POLL_YIELD;
3235                 status = false;
3236         } else {
3237                 eqo->state |= BE_EQ_POLL;
3238         }
3239         spin_unlock_bh(&eqo->lock);
3240         return status;
3241 }
3242
3243 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3244 {
3245         spin_lock_bh(&eqo->lock);
3246
3247         WARN_ON(eqo->state & (BE_EQ_NAPI));
3248         eqo->state = BE_EQ_IDLE;
3249
3250         spin_unlock_bh(&eqo->lock);
3251 }
3252
3253 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3254 {
3255         spin_lock_init(&eqo->lock);
3256         eqo->state = BE_EQ_IDLE;
3257 }
3258
3259 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3260 {
3261         local_bh_disable();
3262
3263         /* It's enough to just acquire napi lock on the eqo to stop
3264          * be_busy_poll() from processing any queueus.
3265          */
3266         while (!be_lock_napi(eqo))
3267                 mdelay(1);
3268
3269         local_bh_enable();
3270 }
3271
3272 #else /* CONFIG_NET_RX_BUSY_POLL */
3273
3274 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3275 {
3276         return true;
3277 }
3278
3279 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3280 {
3281 }
3282
3283 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3284 {
3285         return false;
3286 }
3287
3288 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3289 {
3290 }
3291
3292 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3293 {
3294 }
3295
3296 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3297 {
3298 }
3299 #endif /* CONFIG_NET_RX_BUSY_POLL */
3300
3301 int be_poll(struct napi_struct *napi, int budget)
3302 {
3303         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3304         struct be_adapter *adapter = eqo->adapter;
3305         int max_work = 0, work, i, num_evts;
3306         struct be_rx_obj *rxo;
3307         struct be_tx_obj *txo;
3308         u32 mult_enc = 0;
3309
3310         num_evts = events_get(eqo);
3311
3312         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3313                 be_process_tx(adapter, txo, i);
3314
3315         if (be_lock_napi(eqo)) {
3316                 /* This loop will iterate twice for EQ0 in which
3317                  * completions of the last RXQ (default one) are also processed
3318                  * For other EQs the loop iterates only once
3319                  */
3320                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3321                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3322                         max_work = max(work, max_work);
3323                 }
3324                 be_unlock_napi(eqo);
3325         } else {
3326                 max_work = budget;
3327         }
3328
3329         if (is_mcc_eqo(eqo))
3330                 be_process_mcc(adapter);
3331
3332         if (max_work < budget) {
3333                 napi_complete(napi);
3334
3335                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3336                  * delay via a delay multiplier encoding value
3337                  */
3338                 if (skyhawk_chip(adapter))
3339                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3340
3341                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3342                              mult_enc);
3343         } else {
3344                 /* As we'll continue in polling mode, count and clear events */
3345                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3346         }
3347         return max_work;
3348 }
3349
3350 #ifdef CONFIG_NET_RX_BUSY_POLL
3351 static int be_busy_poll(struct napi_struct *napi)
3352 {
3353         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3354         struct be_adapter *adapter = eqo->adapter;
3355         struct be_rx_obj *rxo;
3356         int i, work = 0;
3357
3358         if (!be_lock_busy_poll(eqo))
3359                 return LL_FLUSH_BUSY;
3360
3361         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3362                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3363                 if (work)
3364                         break;
3365         }
3366
3367         be_unlock_busy_poll(eqo);
3368         return work;
3369 }
3370 #endif
3371
3372 void be_detect_error(struct be_adapter *adapter)
3373 {
3374         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3375         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3376         u32 i;
3377         struct device *dev = &adapter->pdev->dev;
3378
3379         if (be_check_error(adapter, BE_ERROR_HW))
3380                 return;
3381
3382         if (lancer_chip(adapter)) {
3383                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3384                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3385                         be_set_error(adapter, BE_ERROR_UE);
3386                         sliport_err1 = ioread32(adapter->db +
3387                                                 SLIPORT_ERROR1_OFFSET);
3388                         sliport_err2 = ioread32(adapter->db +
3389                                                 SLIPORT_ERROR2_OFFSET);
3390                         /* Do not log error messages if its a FW reset */
3391                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3392                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3393                                 dev_info(dev, "Firmware update in progress\n");
3394                         } else {
3395                                 dev_err(dev, "Error detected in the card\n");
3396                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3397                                         sliport_status);
3398                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3399                                         sliport_err1);
3400                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3401                                         sliport_err2);
3402                         }
3403                 }
3404         } else {
3405                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3406                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3407                 ue_lo_mask = ioread32(adapter->pcicfg +
3408                                       PCICFG_UE_STATUS_LOW_MASK);
3409                 ue_hi_mask = ioread32(adapter->pcicfg +
3410                                       PCICFG_UE_STATUS_HI_MASK);
3411
3412                 ue_lo = (ue_lo & ~ue_lo_mask);
3413                 ue_hi = (ue_hi & ~ue_hi_mask);
3414
3415                 /* On certain platforms BE hardware can indicate spurious UEs.
3416                  * Allow HW to stop working completely in case of a real UE.
3417                  * Hence not setting the hw_error for UE detection.
3418                  */
3419
3420                 if (ue_lo || ue_hi) {
3421                         dev_err(dev, "Error detected in the adapter");
3422                         if (skyhawk_chip(adapter))
3423                                 be_set_error(adapter, BE_ERROR_UE);
3424
3425                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3426                                 if (ue_lo & 1)
3427                                         dev_err(dev, "UE: %s bit set\n",
3428                                                 ue_status_low_desc[i]);
3429                         }
3430                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3431                                 if (ue_hi & 1)
3432                                         dev_err(dev, "UE: %s bit set\n",
3433                                                 ue_status_hi_desc[i]);
3434                         }
3435                 }
3436         }
3437 }
3438
3439 static void be_msix_disable(struct be_adapter *adapter)
3440 {
3441         if (msix_enabled(adapter)) {
3442                 pci_disable_msix(adapter->pdev);
3443                 adapter->num_msix_vec = 0;
3444                 adapter->num_msix_roce_vec = 0;
3445         }
3446 }
3447
3448 static int be_msix_enable(struct be_adapter *adapter)
3449 {
3450         unsigned int i, max_roce_eqs;
3451         struct device *dev = &adapter->pdev->dev;
3452         int num_vec;
3453
3454         /* If RoCE is supported, program the max number of vectors that
3455          * could be used for NIC and RoCE, else, just program the number
3456          * we'll use initially.
3457          */
3458         if (be_roce_supported(adapter)) {
3459                 max_roce_eqs =
3460                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3461                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3462                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3463         } else {
3464                 num_vec = max(adapter->cfg_num_rx_irqs,
3465                               adapter->cfg_num_tx_irqs);
3466         }
3467
3468         for (i = 0; i < num_vec; i++)
3469                 adapter->msix_entries[i].entry = i;
3470
3471         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3472                                         MIN_MSIX_VECTORS, num_vec);
3473         if (num_vec < 0)
3474                 goto fail;
3475
3476         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3477                 adapter->num_msix_roce_vec = num_vec / 2;
3478                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3479                          adapter->num_msix_roce_vec);
3480         }
3481
3482         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3483
3484         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3485                  adapter->num_msix_vec);
3486         return 0;
3487
3488 fail:
3489         dev_warn(dev, "MSIx enable failed\n");
3490
3491         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3492         if (be_virtfn(adapter))
3493                 return num_vec;
3494         return 0;
3495 }
3496
3497 static inline int be_msix_vec_get(struct be_adapter *adapter,
3498                                   struct be_eq_obj *eqo)
3499 {
3500         return adapter->msix_entries[eqo->msix_idx].vector;
3501 }
3502
3503 static int be_msix_register(struct be_adapter *adapter)
3504 {
3505         struct net_device *netdev = adapter->netdev;
3506         struct be_eq_obj *eqo;
3507         int status, i, vec;
3508
3509         for_all_evt_queues(adapter, eqo, i) {
3510                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3511                 vec = be_msix_vec_get(adapter, eqo);
3512                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3513                 if (status)
3514                         goto err_msix;
3515
3516                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3517         }
3518
3519         return 0;
3520 err_msix:
3521         for (i--; i >= 0; i--) {
3522                 eqo = &adapter->eq_obj[i];
3523                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3524         }
3525         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3526                  status);
3527         be_msix_disable(adapter);
3528         return status;
3529 }
3530
3531 static int be_irq_register(struct be_adapter *adapter)
3532 {
3533         struct net_device *netdev = adapter->netdev;
3534         int status;
3535
3536         if (msix_enabled(adapter)) {
3537                 status = be_msix_register(adapter);
3538                 if (status == 0)
3539                         goto done;
3540                 /* INTx is not supported for VF */
3541                 if (be_virtfn(adapter))
3542                         return status;
3543         }
3544
3545         /* INTx: only the first EQ is used */
3546         netdev->irq = adapter->pdev->irq;
3547         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3548                              &adapter->eq_obj[0]);
3549         if (status) {
3550                 dev_err(&adapter->pdev->dev,
3551                         "INTx request IRQ failed - err %d\n", status);
3552                 return status;
3553         }
3554 done:
3555         adapter->isr_registered = true;
3556         return 0;
3557 }
3558
3559 static void be_irq_unregister(struct be_adapter *adapter)
3560 {
3561         struct net_device *netdev = adapter->netdev;
3562         struct be_eq_obj *eqo;
3563         int i, vec;
3564
3565         if (!adapter->isr_registered)
3566                 return;
3567
3568         /* INTx */
3569         if (!msix_enabled(adapter)) {
3570                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3571                 goto done;
3572         }
3573
3574         /* MSIx */
3575         for_all_evt_queues(adapter, eqo, i) {
3576                 vec = be_msix_vec_get(adapter, eqo);
3577                 irq_set_affinity_hint(vec, NULL);
3578                 free_irq(vec, eqo);
3579         }
3580
3581 done:
3582         adapter->isr_registered = false;
3583 }
3584
3585 static void be_rx_qs_destroy(struct be_adapter *adapter)
3586 {
3587         struct rss_info *rss = &adapter->rss_info;
3588         struct be_queue_info *q;
3589         struct be_rx_obj *rxo;
3590         int i;
3591
3592         for_all_rx_queues(adapter, rxo, i) {
3593                 q = &rxo->q;
3594                 if (q->created) {
3595                         /* If RXQs are destroyed while in an "out of buffer"
3596                          * state, there is a possibility of an HW stall on
3597                          * Lancer. So, post 64 buffers to each queue to relieve
3598                          * the "out of buffer" condition.
3599                          * Make sure there's space in the RXQ before posting.
3600                          */
3601                         if (lancer_chip(adapter)) {
3602                                 be_rx_cq_clean(rxo);
3603                                 if (atomic_read(&q->used) == 0)
3604                                         be_post_rx_frags(rxo, GFP_KERNEL,
3605                                                          MAX_RX_POST);
3606                         }
3607
3608                         be_cmd_rxq_destroy(adapter, q);
3609                         be_rx_cq_clean(rxo);
3610                         be_rxq_clean(rxo);
3611                 }
3612                 be_queue_free(adapter, q);
3613         }
3614
3615         if (rss->rss_flags) {
3616                 rss->rss_flags = RSS_ENABLE_NONE;
3617                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3618                                   128, rss->rss_hkey);
3619         }
3620 }
3621
3622 static void be_disable_if_filters(struct be_adapter *adapter)
3623 {
3624         be_dev_mac_del(adapter, adapter->pmac_id[0]);
3625         be_clear_uc_list(adapter);
3626         be_clear_mc_list(adapter);
3627
3628         /* The IFACE flags are enabled in the open path and cleared
3629          * in the close path. When a VF gets detached from the host and
3630          * assigned to a VM the following happens:
3631          *      - VF's IFACE flags get cleared in the detach path
3632          *      - IFACE create is issued by the VF in the attach path
3633          * Due to a bug in the BE3/Skyhawk-R FW
3634          * (Lancer FW doesn't have the bug), the IFACE capability flags
3635          * specified along with the IFACE create cmd issued by a VF are not
3636          * honoured by FW.  As a consequence, if a *new* driver
3637          * (that enables/disables IFACE flags in open/close)
3638          * is loaded in the host and an *old* driver is * used by a VM/VF,
3639          * the IFACE gets created *without* the needed flags.
3640          * To avoid this, disable RX-filter flags only for Lancer.
3641          */
3642         if (lancer_chip(adapter)) {
3643                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3644                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3645         }
3646 }
3647
3648 static int be_close(struct net_device *netdev)
3649 {
3650         struct be_adapter *adapter = netdev_priv(netdev);
3651         struct be_eq_obj *eqo;
3652         int i;
3653
3654         /* This protection is needed as be_close() may be called even when the
3655          * adapter is in cleared state (after eeh perm failure)
3656          */
3657         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3658                 return 0;
3659
3660         /* Before attempting cleanup ensure all the pending cmds in the
3661          * config_wq have finished execution
3662          */
3663         flush_workqueue(be_wq);
3664
3665         be_disable_if_filters(adapter);
3666
3667         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3668                 for_all_evt_queues(adapter, eqo, i) {
3669                         napi_disable(&eqo->napi);
3670                         be_disable_busy_poll(eqo);
3671                 }
3672                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3673         }
3674
3675         be_async_mcc_disable(adapter);
3676
3677         /* Wait for all pending tx completions to arrive so that
3678          * all tx skbs are freed.
3679          */
3680         netif_tx_disable(netdev);
3681         be_tx_compl_clean(adapter);
3682
3683         be_rx_qs_destroy(adapter);
3684
3685         for_all_evt_queues(adapter, eqo, i) {
3686                 if (msix_enabled(adapter))
3687                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3688                 else
3689                         synchronize_irq(netdev->irq);
3690                 be_eq_clean(eqo);
3691         }
3692
3693         be_irq_unregister(adapter);
3694
3695         return 0;
3696 }
3697
3698 static int be_rx_qs_create(struct be_adapter *adapter)
3699 {
3700         struct rss_info *rss = &adapter->rss_info;
3701         u8 rss_key[RSS_HASH_KEY_LEN];
3702         struct be_rx_obj *rxo;
3703         int rc, i, j;
3704
3705         for_all_rx_queues(adapter, rxo, i) {
3706                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3707                                     sizeof(struct be_eth_rx_d));
3708                 if (rc)
3709                         return rc;
3710         }
3711
3712         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3713                 rxo = default_rxo(adapter);
3714                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3715                                        rx_frag_size, adapter->if_handle,
3716                                        false, &rxo->rss_id);
3717                 if (rc)
3718                         return rc;
3719         }
3720
3721         for_all_rss_queues(adapter, rxo, i) {
3722                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3723                                        rx_frag_size, adapter->if_handle,
3724                                        true, &rxo->rss_id);
3725                 if (rc)
3726                         return rc;
3727         }
3728
3729         if (be_multi_rxq(adapter)) {
3730                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3731                         for_all_rss_queues(adapter, rxo, i) {
3732                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3733                                         break;
3734                                 rss->rsstable[j + i] = rxo->rss_id;
3735                                 rss->rss_queue[j + i] = i;
3736                         }
3737                 }
3738                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3739                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3740
3741                 if (!BEx_chip(adapter))
3742                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3743                                 RSS_ENABLE_UDP_IPV6;
3744
3745                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3746                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3747                                        RSS_INDIR_TABLE_LEN, rss_key);
3748                 if (rc) {
3749                         rss->rss_flags = RSS_ENABLE_NONE;
3750                         return rc;
3751                 }
3752
3753                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3754         } else {
3755                 /* Disable RSS, if only default RX Q is created */
3756                 rss->rss_flags = RSS_ENABLE_NONE;
3757         }
3758
3759
3760         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3761          * which is a queue empty condition
3762          */
3763         for_all_rx_queues(adapter, rxo, i)
3764                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3765
3766         return 0;
3767 }
3768
3769 static int be_enable_if_filters(struct be_adapter *adapter)
3770 {
3771         int status;
3772
3773         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3774         if (status)
3775                 return status;
3776
3777         /* For BE3 VFs, the PF programs the initial MAC address */
3778         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3779                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780                 if (status)
3781                         return status;
3782                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783         }
3784
3785         if (adapter->vlans_added)
3786                 be_vid_config(adapter);
3787
3788         __be_set_rx_mode(adapter);
3789
3790         return 0;
3791 }
3792
3793 static int be_open(struct net_device *netdev)
3794 {
3795         struct be_adapter *adapter = netdev_priv(netdev);
3796         struct be_eq_obj *eqo;
3797         struct be_rx_obj *rxo;
3798         struct be_tx_obj *txo;
3799         u8 link_status;
3800         int status, i;
3801
3802         status = be_rx_qs_create(adapter);
3803         if (status)
3804                 goto err;
3805
3806         status = be_enable_if_filters(adapter);
3807         if (status)
3808                 goto err;
3809
3810         status = be_irq_register(adapter);
3811         if (status)
3812                 goto err;
3813
3814         for_all_rx_queues(adapter, rxo, i)
3815                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817         for_all_tx_queues(adapter, txo, i)
3818                 be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820         be_async_mcc_enable(adapter);
3821
3822         for_all_evt_queues(adapter, eqo, i) {
3823                 napi_enable(&eqo->napi);
3824                 be_enable_busy_poll(eqo);
3825                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3826         }
3827         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3828
3829         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3830         if (!status)
3831                 be_link_status_update(adapter, link_status);
3832
3833         netif_tx_start_all_queues(netdev);
3834         if (skyhawk_chip(adapter))
3835                 udp_tunnel_get_rx_info(netdev);
3836
3837         return 0;
3838 err:
3839         be_close(adapter->netdev);
3840         return -EIO;
3841 }
3842
3843 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3844 {
3845         u32 addr;
3846
3847         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3848
3849         mac[5] = (u8)(addr & 0xFF);
3850         mac[4] = (u8)((addr >> 8) & 0xFF);
3851         mac[3] = (u8)((addr >> 16) & 0xFF);
3852         /* Use the OUI from the current MAC address */
3853         memcpy(mac, adapter->netdev->dev_addr, 3);
3854 }
3855
3856 /*
3857  * Generate a seed MAC address from the PF MAC Address using jhash.
3858  * MAC Address for VFs are assigned incrementally starting from the seed.
3859  * These addresses are programmed in the ASIC by the PF and the VF driver
3860  * queries for the MAC address during its probe.
3861  */
3862 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3863 {
3864         u32 vf;
3865         int status = 0;
3866         u8 mac[ETH_ALEN];
3867         struct be_vf_cfg *vf_cfg;
3868
3869         be_vf_eth_addr_generate(adapter, mac);
3870
3871         for_all_vfs(adapter, vf_cfg, vf) {
3872                 if (BEx_chip(adapter))
3873                         status = be_cmd_pmac_add(adapter, mac,
3874                                                  vf_cfg->if_handle,
3875                                                  &vf_cfg->pmac_id, vf + 1);
3876                 else
3877                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3878                                                 vf + 1);
3879
3880                 if (status)
3881                         dev_err(&adapter->pdev->dev,
3882                                 "Mac address assignment failed for VF %d\n",
3883                                 vf);
3884                 else
3885                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3886
3887                 mac[5] += 1;
3888         }
3889         return status;
3890 }
3891
3892 static int be_vfs_mac_query(struct be_adapter *adapter)
3893 {
3894         int status, vf;
3895         u8 mac[ETH_ALEN];
3896         struct be_vf_cfg *vf_cfg;
3897
3898         for_all_vfs(adapter, vf_cfg, vf) {
3899                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3900                                                mac, vf_cfg->if_handle,
3901                                                false, vf+1);
3902                 if (status)
3903                         return status;
3904                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3905         }
3906         return 0;
3907 }
3908
3909 static void be_vf_clear(struct be_adapter *adapter)
3910 {
3911         struct be_vf_cfg *vf_cfg;
3912         u32 vf;
3913
3914         if (pci_vfs_assigned(adapter->pdev)) {
3915                 dev_warn(&adapter->pdev->dev,
3916                          "VFs are assigned to VMs: not disabling VFs\n");
3917                 goto done;
3918         }
3919
3920         pci_disable_sriov(adapter->pdev);
3921
3922         for_all_vfs(adapter, vf_cfg, vf) {
3923                 if (BEx_chip(adapter))
3924                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3925                                         vf_cfg->pmac_id, vf + 1);
3926                 else
3927                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3928                                        vf + 1);
3929
3930                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3931         }
3932
3933         if (BE3_chip(adapter))
3934                 be_cmd_set_hsw_config(adapter, 0, 0,
3935                                       adapter->if_handle,
3936                                       PORT_FWD_TYPE_PASSTHRU, 0);
3937 done:
3938         kfree(adapter->vf_cfg);
3939         adapter->num_vfs = 0;
3940         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3941 }
3942
3943 static void be_clear_queues(struct be_adapter *adapter)
3944 {
3945         be_mcc_queues_destroy(adapter);
3946         be_rx_cqs_destroy(adapter);
3947         be_tx_queues_destroy(adapter);
3948         be_evt_queues_destroy(adapter);
3949 }
3950
3951 static void be_cancel_worker(struct be_adapter *adapter)
3952 {
3953         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3954                 cancel_delayed_work_sync(&adapter->work);
3955                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3956         }
3957 }
3958
3959 static void be_cancel_err_detection(struct be_adapter *adapter)
3960 {
3961         struct be_error_recovery *err_rec = &adapter->error_recovery;
3962
3963         if (!be_err_recovery_workq)
3964                 return;
3965
3966         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3967                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3968                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3969         }
3970 }
3971
3972 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3973 {
3974         struct net_device *netdev = adapter->netdev;
3975
3976         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3977                 be_cmd_manage_iface(adapter, adapter->if_handle,
3978                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3979
3980         if (adapter->vxlan_port)
3981                 be_cmd_set_vxlan_port(adapter, 0);
3982
3983         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3984         adapter->vxlan_port = 0;
3985
3986         netdev->hw_enc_features = 0;
3987         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3988         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3989 }
3990
3991 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3992                                 struct be_resources *vft_res)
3993 {
3994         struct be_resources res = adapter->pool_res;
3995         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3996         struct be_resources res_mod = {0};
3997         u16 num_vf_qs = 1;
3998
3999         /* Distribute the queue resources among the PF and it's VFs */
4000         if (num_vfs) {
4001                 /* Divide the rx queues evenly among the VFs and the PF, capped
4002                  * at VF-EQ-count. Any remainder queues belong to the PF.
4003                  */
4004                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4005                                 res.max_rss_qs / (num_vfs + 1));
4006
4007                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4008                  * RSS Tables per port. Provide RSS on VFs, only if number of
4009                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4010                  */
4011                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4012                         num_vf_qs = 1;
4013         }
4014
4015         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4016          * which are modifiable using SET_PROFILE_CONFIG cmd.
4017          */
4018         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4019                                   RESOURCE_MODIFIABLE, 0);
4020
4021         /* If RSS IFACE capability flags are modifiable for a VF, set the
4022          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4023          * more than 1 RSSQ is available for a VF.
4024          * Otherwise, provision only 1 queue pair for VF.
4025          */
4026         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4027                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4028                 if (num_vf_qs > 1) {
4029                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4030                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4031                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4032                 } else {
4033                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4034                                              BE_IF_FLAGS_DEFQ_RSS);
4035                 }
4036         } else {
4037                 num_vf_qs = 1;
4038         }
4039
4040         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4041                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4042                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4043         }
4044
4045         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4046         vft_res->max_rx_qs = num_vf_qs;
4047         vft_res->max_rss_qs = num_vf_qs;
4048         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4049         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4050
4051         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4052          * among the PF and it's VFs, if the fields are changeable
4053          */
4054         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4055                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4056
4057         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4058                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4059
4060         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4061                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4062
4063         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4064                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4065 }
4066
4067 static void be_if_destroy(struct be_adapter *adapter)
4068 {
4069         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4070
4071         kfree(adapter->pmac_id);
4072         adapter->pmac_id = NULL;
4073
4074         kfree(adapter->mc_list);
4075         adapter->mc_list = NULL;
4076
4077         kfree(adapter->uc_list);
4078         adapter->uc_list = NULL;
4079 }
4080
4081 static int be_clear(struct be_adapter *adapter)
4082 {
4083         struct pci_dev *pdev = adapter->pdev;
4084         struct  be_resources vft_res = {0};
4085
4086         be_cancel_worker(adapter);
4087
4088         flush_workqueue(be_wq);
4089
4090         if (sriov_enabled(adapter))
4091                 be_vf_clear(adapter);
4092
4093         /* Re-configure FW to distribute resources evenly across max-supported
4094          * number of VFs, only when VFs are not already enabled.
4095          */
4096         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4097             !pci_vfs_assigned(pdev)) {
4098                 be_calculate_vf_res(adapter,
4099                                     pci_sriov_get_totalvfs(pdev),
4100                                     &vft_res);
4101                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4102                                         pci_sriov_get_totalvfs(pdev),
4103                                         &vft_res);
4104         }
4105
4106         be_disable_vxlan_offloads(adapter);
4107
4108         be_if_destroy(adapter);
4109
4110         be_clear_queues(adapter);
4111
4112         be_msix_disable(adapter);
4113         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4114         return 0;
4115 }
4116
4117 static int be_vfs_if_create(struct be_adapter *adapter)
4118 {
4119         struct be_resources res = {0};
4120         u32 cap_flags, en_flags, vf;
4121         struct be_vf_cfg *vf_cfg;
4122         int status;
4123
4124         /* If a FW profile exists, then cap_flags are updated */
4125         cap_flags = BE_VF_IF_EN_FLAGS;
4126
4127         for_all_vfs(adapter, vf_cfg, vf) {
4128                 if (!BE3_chip(adapter)) {
4129                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4130                                                            ACTIVE_PROFILE_TYPE,
4131                                                            RESOURCE_LIMITS,
4132                                                            vf + 1);
4133                         if (!status) {
4134                                 cap_flags = res.if_cap_flags;
4135                                 /* Prevent VFs from enabling VLAN promiscuous
4136                                  * mode
4137                                  */
4138                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4139                         }
4140                 }
4141
4142                 /* PF should enable IF flags during proxy if_create call */
4143                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4144                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4145                                           &vf_cfg->if_handle, vf + 1);
4146                 if (status)
4147                         return status;
4148         }
4149
4150         return 0;
4151 }
4152
4153 static int be_vf_setup_init(struct be_adapter *adapter)
4154 {
4155         struct be_vf_cfg *vf_cfg;
4156         int vf;
4157
4158         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4159                                   GFP_KERNEL);
4160         if (!adapter->vf_cfg)
4161                 return -ENOMEM;
4162
4163         for_all_vfs(adapter, vf_cfg, vf) {
4164                 vf_cfg->if_handle = -1;
4165                 vf_cfg->pmac_id = -1;
4166         }
4167         return 0;
4168 }
4169
4170 static int be_vf_setup(struct be_adapter *adapter)
4171 {
4172         struct device *dev = &adapter->pdev->dev;
4173         struct be_vf_cfg *vf_cfg;
4174         int status, old_vfs, vf;
4175         bool spoofchk;
4176
4177         old_vfs = pci_num_vf(adapter->pdev);
4178
4179         status = be_vf_setup_init(adapter);
4180         if (status)
4181                 goto err;
4182
4183         if (old_vfs) {
4184                 for_all_vfs(adapter, vf_cfg, vf) {
4185                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4186                         if (status)
4187                                 goto err;
4188                 }
4189
4190                 status = be_vfs_mac_query(adapter);
4191                 if (status)
4192                         goto err;
4193         } else {
4194                 status = be_vfs_if_create(adapter);
4195                 if (status)
4196                         goto err;
4197
4198                 status = be_vf_eth_addr_config(adapter);
4199                 if (status)
4200                         goto err;
4201         }
4202
4203         for_all_vfs(adapter, vf_cfg, vf) {
4204                 /* Allow VFs to programs MAC/VLAN filters */
4205                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4206                                                   vf + 1);
4207                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4208                         status = be_cmd_set_fn_privileges(adapter,
4209                                                           vf_cfg->privileges |
4210                                                           BE_PRIV_FILTMGMT,
4211                                                           vf + 1);
4212                         if (!status) {
4213                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4214                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4215                                          vf);
4216                         }
4217                 }
4218
4219                 /* Allow full available bandwidth */
4220                 if (!old_vfs)
4221                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4222
4223                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4224                                                vf_cfg->if_handle, NULL,
4225                                                &spoofchk);
4226                 if (!status)
4227                         vf_cfg->spoofchk = spoofchk;
4228
4229                 if (!old_vfs) {
4230                         be_cmd_enable_vf(adapter, vf + 1);
4231                         be_cmd_set_logical_link_config(adapter,
4232                                                        IFLA_VF_LINK_STATE_AUTO,
4233                                                        vf+1);
4234                 }
4235         }
4236
4237         if (!old_vfs) {
4238                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4239                 if (status) {
4240                         dev_err(dev, "SRIOV enable failed\n");
4241                         adapter->num_vfs = 0;
4242                         goto err;
4243                 }
4244         }
4245
4246         if (BE3_chip(adapter)) {
4247                 /* On BE3, enable VEB only when SRIOV is enabled */
4248                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4249                                                adapter->if_handle,
4250                                                PORT_FWD_TYPE_VEB, 0);
4251                 if (status)
4252                         goto err;
4253         }
4254
4255         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4256         return 0;
4257 err:
4258         dev_err(dev, "VF setup failed\n");
4259         be_vf_clear(adapter);
4260         return status;
4261 }
4262
4263 /* Converting function_mode bits on BE3 to SH mc_type enums */
4264
4265 static u8 be_convert_mc_type(u32 function_mode)
4266 {
4267         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4268                 return vNIC1;
4269         else if (function_mode & QNQ_MODE)
4270                 return FLEX10;
4271         else if (function_mode & VNIC_MODE)
4272                 return vNIC2;
4273         else if (function_mode & UMC_ENABLED)
4274                 return UMC;
4275         else
4276                 return MC_NONE;
4277 }
4278
4279 /* On BE2/BE3 FW does not suggest the supported limits */
4280 static void BEx_get_resources(struct be_adapter *adapter,
4281                               struct be_resources *res)
4282 {
4283         bool use_sriov = adapter->num_vfs ? 1 : 0;
4284
4285         if (be_physfn(adapter))
4286                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4287         else
4288                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4289
4290         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4291
4292         if (be_is_mc(adapter)) {
4293                 /* Assuming that there are 4 channels per port,
4294                  * when multi-channel is enabled
4295                  */
4296                 if (be_is_qnq_mode(adapter))
4297                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4298                 else
4299                         /* In a non-qnq multichannel mode, the pvid
4300                          * takes up one vlan entry
4301                          */
4302                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4303         } else {
4304                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4305         }
4306
4307         res->max_mcast_mac = BE_MAX_MC;
4308
4309         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4310          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4311          *    *only* if it is RSS-capable.
4312          */
4313         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4314             be_virtfn(adapter) ||
4315             (be_is_mc(adapter) &&
4316              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4317                 res->max_tx_qs = 1;
4318         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4319                 struct be_resources super_nic_res = {0};
4320
4321                 /* On a SuperNIC profile, the driver needs to use the
4322                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4323                  */
4324                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4325                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4326                                           0);
4327                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4328                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4329         } else {
4330                 res->max_tx_qs = BE3_MAX_TX_QS;
4331         }
4332
4333         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4334             !use_sriov && be_physfn(adapter))
4335                 res->max_rss_qs = (adapter->be3_native) ?
4336                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4337         res->max_rx_qs = res->max_rss_qs + 1;
4338
4339         if (be_physfn(adapter))
4340                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4341                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4342         else
4343                 res->max_evt_qs = 1;
4344
4345         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4346         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4347         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4348                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4349 }
4350
4351 static void be_setup_init(struct be_adapter *adapter)
4352 {
4353         adapter->vlan_prio_bmap = 0xff;
4354         adapter->phy.link_speed = -1;
4355         adapter->if_handle = -1;
4356         adapter->be3_native = false;
4357         adapter->if_flags = 0;
4358         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4359         if (be_physfn(adapter))
4360                 adapter->cmd_privileges = MAX_PRIVILEGES;
4361         else
4362                 adapter->cmd_privileges = MIN_PRIVILEGES;
4363 }
4364
4365 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4366  * However, this HW limitation is not exposed to the host via any SLI cmd.
4367  * As a result, in the case of SRIOV and in particular multi-partition configs
4368  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4369  * for distribution between the VFs. This self-imposed limit will determine the
4370  * no: of VFs for which RSS can be enabled.
4371  */
4372 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4373 {
4374         struct be_port_resources port_res = {0};
4375         u8 rss_tables_on_port;
4376         u16 max_vfs = be_max_vfs(adapter);
4377
4378         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4379                                   RESOURCE_LIMITS, 0);
4380
4381         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4382
4383         /* Each PF Pool's RSS Tables limit =
4384          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4385          */
4386         adapter->pool_res.max_rss_tables =
4387                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4388 }
4389
4390 static int be_get_sriov_config(struct be_adapter *adapter)
4391 {
4392         struct be_resources res = {0};
4393         int max_vfs, old_vfs;
4394
4395         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4396                                   RESOURCE_LIMITS, 0);
4397
4398         /* Some old versions of BE3 FW don't report max_vfs value */
4399         if (BE3_chip(adapter) && !res.max_vfs) {
4400                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4401                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4402         }
4403
4404         adapter->pool_res = res;
4405
4406         /* If during previous unload of the driver, the VFs were not disabled,
4407          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4408          * Instead use the TotalVFs value stored in the pci-dev struct.
4409          */
4410         old_vfs = pci_num_vf(adapter->pdev);
4411         if (old_vfs) {
4412                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4413                          old_vfs);
4414
4415                 adapter->pool_res.max_vfs =
4416                         pci_sriov_get_totalvfs(adapter->pdev);
4417                 adapter->num_vfs = old_vfs;
4418         }
4419
4420         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4421                 be_calculate_pf_pool_rss_tables(adapter);
4422                 dev_info(&adapter->pdev->dev,
4423                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4424                          be_max_pf_pool_rss_tables(adapter));
4425         }
4426         return 0;
4427 }
4428
4429 static void be_alloc_sriov_res(struct be_adapter *adapter)
4430 {
4431         int old_vfs = pci_num_vf(adapter->pdev);
4432         struct  be_resources vft_res = {0};
4433         int status;
4434
4435         be_get_sriov_config(adapter);
4436
4437         if (!old_vfs)
4438                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4439
4440         /* When the HW is in SRIOV capable configuration, the PF-pool
4441          * resources are given to PF during driver load, if there are no
4442          * old VFs. This facility is not available in BE3 FW.
4443          * Also, this is done by FW in Lancer chip.
4444          */
4445         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4446                 be_calculate_vf_res(adapter, 0, &vft_res);
4447                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4448                                                  &vft_res);
4449                 if (status)
4450                         dev_err(&adapter->pdev->dev,
4451                                 "Failed to optimize SRIOV resources\n");
4452         }
4453 }
4454
4455 static int be_get_resources(struct be_adapter *adapter)
4456 {
4457         struct device *dev = &adapter->pdev->dev;
4458         struct be_resources res = {0};
4459         int status;
4460
4461         /* For Lancer, SH etc read per-function resource limits from FW.
4462          * GET_FUNC_CONFIG returns per function guaranteed limits.
4463          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4464          */
4465         if (BEx_chip(adapter)) {
4466                 BEx_get_resources(adapter, &res);
4467         } else {
4468                 status = be_cmd_get_func_config(adapter, &res);
4469                 if (status)
4470                         return status;
4471
4472                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4473                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4474                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4475                         res.max_rss_qs -= 1;
4476         }
4477
4478         /* If RoCE is supported stash away half the EQs for RoCE */
4479         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4480                                 res.max_evt_qs / 2 : res.max_evt_qs;
4481         adapter->res = res;
4482
4483         /* If FW supports RSS default queue, then skip creating non-RSS
4484          * queue for non-IP traffic.
4485          */
4486         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4487                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4488
4489         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4490                  be_max_txqs(adapter), be_max_rxqs(adapter),
4491                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4492                  be_max_vfs(adapter));
4493         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4494                  be_max_uc(adapter), be_max_mc(adapter),
4495                  be_max_vlans(adapter));
4496
4497         /* Ensure RX and TX queues are created in pairs at init time */
4498         adapter->cfg_num_rx_irqs =
4499                                 min_t(u16, netif_get_num_default_rss_queues(),
4500                                       be_max_qp_irqs(adapter));
4501         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4502         return 0;
4503 }
4504
4505 static int be_get_config(struct be_adapter *adapter)
4506 {
4507         int status, level;
4508         u16 profile_id;
4509
4510         status = be_cmd_get_cntl_attributes(adapter);
4511         if (status)
4512                 return status;
4513
4514         status = be_cmd_query_fw_cfg(adapter);
4515         if (status)
4516                 return status;
4517
4518         if (!lancer_chip(adapter) && be_physfn(adapter))
4519                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4520
4521         if (BEx_chip(adapter)) {
4522                 level = be_cmd_get_fw_log_level(adapter);
4523                 adapter->msg_enable =
4524                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4525         }
4526
4527         be_cmd_get_acpi_wol_cap(adapter);
4528         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4529         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4530
4531         be_cmd_query_port_name(adapter);
4532
4533         if (be_physfn(adapter)) {
4534                 status = be_cmd_get_active_profile(adapter, &profile_id);
4535                 if (!status)
4536                         dev_info(&adapter->pdev->dev,
4537                                  "Using profile 0x%x\n", profile_id);
4538         }
4539
4540         return 0;
4541 }
4542
4543 static int be_mac_setup(struct be_adapter *adapter)
4544 {
4545         u8 mac[ETH_ALEN];
4546         int status;
4547
4548         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4549                 status = be_cmd_get_perm_mac(adapter, mac);
4550                 if (status)
4551                         return status;
4552
4553                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4554                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4555         }
4556
4557         return 0;
4558 }
4559
4560 static void be_schedule_worker(struct be_adapter *adapter)
4561 {
4562         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4563         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4564 }
4565
4566 static void be_destroy_err_recovery_workq(void)
4567 {
4568         if (!be_err_recovery_workq)
4569                 return;
4570
4571         flush_workqueue(be_err_recovery_workq);
4572         destroy_workqueue(be_err_recovery_workq);
4573         be_err_recovery_workq = NULL;
4574 }
4575
4576 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4577 {
4578         struct be_error_recovery *err_rec = &adapter->error_recovery;
4579
4580         if (!be_err_recovery_workq)
4581                 return;
4582
4583         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4584                            msecs_to_jiffies(delay));
4585         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4586 }
4587
4588 static int be_setup_queues(struct be_adapter *adapter)
4589 {
4590         struct net_device *netdev = adapter->netdev;
4591         int status;
4592
4593         status = be_evt_queues_create(adapter);
4594         if (status)
4595                 goto err;
4596
4597         status = be_tx_qs_create(adapter);
4598         if (status)
4599                 goto err;
4600
4601         status = be_rx_cqs_create(adapter);
4602         if (status)
4603                 goto err;
4604
4605         status = be_mcc_queues_create(adapter);
4606         if (status)
4607                 goto err;
4608
4609         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4610         if (status)
4611                 goto err;
4612
4613         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4614         if (status)
4615                 goto err;
4616
4617         return 0;
4618 err:
4619         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4620         return status;
4621 }
4622
4623 static int be_if_create(struct be_adapter *adapter)
4624 {
4625         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4626         u32 cap_flags = be_if_cap_flags(adapter);
4627         int status;
4628
4629         /* alloc required memory for other filtering fields */
4630         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4631                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4632         if (!adapter->pmac_id)
4633                 return -ENOMEM;
4634
4635         adapter->mc_list = kcalloc(be_max_mc(adapter),
4636                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4637         if (!adapter->mc_list)
4638                 return -ENOMEM;
4639
4640         adapter->uc_list = kcalloc(be_max_uc(adapter),
4641                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4642         if (!adapter->uc_list)
4643                 return -ENOMEM;
4644
4645         if (adapter->cfg_num_rx_irqs == 1)
4646                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4647
4648         en_flags &= cap_flags;
4649         /* will enable all the needed filter flags in be_open() */
4650         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4651                                   &adapter->if_handle, 0);
4652
4653         if (status)
4654                 return status;
4655
4656         return 0;
4657 }
4658
4659 int be_update_queues(struct be_adapter *adapter)
4660 {
4661         struct net_device *netdev = adapter->netdev;
4662         int status;
4663
4664         if (netif_running(netdev))
4665                 be_close(netdev);
4666
4667         be_cancel_worker(adapter);
4668
4669         /* If any vectors have been shared with RoCE we cannot re-program
4670          * the MSIx table.
4671          */
4672         if (!adapter->num_msix_roce_vec)
4673                 be_msix_disable(adapter);
4674
4675         be_clear_queues(adapter);
4676         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4677         if (status)
4678                 return status;
4679
4680         if (!msix_enabled(adapter)) {
4681                 status = be_msix_enable(adapter);
4682                 if (status)
4683                         return status;
4684         }
4685
4686         status = be_if_create(adapter);
4687         if (status)
4688                 return status;
4689
4690         status = be_setup_queues(adapter);
4691         if (status)
4692                 return status;
4693
4694         be_schedule_worker(adapter);
4695
4696         if (netif_running(netdev))
4697                 status = be_open(netdev);
4698
4699         return status;
4700 }
4701
4702 static inline int fw_major_num(const char *fw_ver)
4703 {
4704         int fw_major = 0, i;
4705
4706         i = sscanf(fw_ver, "%d.", &fw_major);
4707         if (i != 1)
4708                 return 0;
4709
4710         return fw_major;
4711 }
4712
4713 /* If it is error recovery, FLR the PF
4714  * Else if any VFs are already enabled don't FLR the PF
4715  */
4716 static bool be_reset_required(struct be_adapter *adapter)
4717 {
4718         if (be_error_recovering(adapter))
4719                 return true;
4720         else
4721                 return pci_num_vf(adapter->pdev) == 0;
4722 }
4723
4724 /* Wait for the FW to be ready and perform the required initialization */
4725 static int be_func_init(struct be_adapter *adapter)
4726 {
4727         int status;
4728
4729         status = be_fw_wait_ready(adapter);
4730         if (status)
4731                 return status;
4732
4733         /* FW is now ready; clear errors to allow cmds/doorbell */
4734         be_clear_error(adapter, BE_CLEAR_ALL);
4735
4736         if (be_reset_required(adapter)) {
4737                 status = be_cmd_reset_function(adapter);
4738                 if (status)
4739                         return status;
4740
4741                 /* Wait for interrupts to quiesce after an FLR */
4742                 msleep(100);
4743         }
4744
4745         /* Tell FW we're ready to fire cmds */
4746         status = be_cmd_fw_init(adapter);
4747         if (status)
4748                 return status;
4749
4750         /* Allow interrupts for other ULPs running on NIC function */
4751         be_intr_set(adapter, true);
4752
4753         return 0;
4754 }
4755
4756 static int be_setup(struct be_adapter *adapter)
4757 {
4758         struct device *dev = &adapter->pdev->dev;
4759         int status;
4760
4761         status = be_func_init(adapter);
4762         if (status)
4763                 return status;
4764
4765         be_setup_init(adapter);
4766
4767         if (!lancer_chip(adapter))
4768                 be_cmd_req_native_mode(adapter);
4769
4770         /* invoke this cmd first to get pf_num and vf_num which are needed
4771          * for issuing profile related cmds
4772          */
4773         if (!BEx_chip(adapter)) {
4774                 status = be_cmd_get_func_config(adapter, NULL);
4775                 if (status)
4776                         return status;
4777         }
4778
4779         status = be_get_config(adapter);
4780         if (status)
4781                 goto err;
4782
4783         if (!BE2_chip(adapter) && be_physfn(adapter))
4784                 be_alloc_sriov_res(adapter);
4785
4786         status = be_get_resources(adapter);
4787         if (status)
4788                 goto err;
4789
4790         status = be_msix_enable(adapter);
4791         if (status)
4792                 goto err;
4793
4794         /* will enable all the needed filter flags in be_open() */
4795         status = be_if_create(adapter);
4796         if (status)
4797                 goto err;
4798
4799         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4800         rtnl_lock();
4801         status = be_setup_queues(adapter);
4802         rtnl_unlock();
4803         if (status)
4804                 goto err;
4805
4806         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4807
4808         status = be_mac_setup(adapter);
4809         if (status)
4810                 goto err;
4811
4812         be_cmd_get_fw_ver(adapter);
4813         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4814
4815         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4816                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4817                         adapter->fw_ver);
4818                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4819         }
4820
4821         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4822                                          adapter->rx_fc);
4823         if (status)
4824                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4825                                         &adapter->rx_fc);
4826
4827         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4828                  adapter->tx_fc, adapter->rx_fc);
4829
4830         if (be_physfn(adapter))
4831                 be_cmd_set_logical_link_config(adapter,
4832                                                IFLA_VF_LINK_STATE_AUTO, 0);
4833
4834         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4835          * confusing a linux bridge or OVS that it might be connected to.
4836          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4837          * when SRIOV is not enabled.
4838          */
4839         if (BE3_chip(adapter))
4840                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4841                                       PORT_FWD_TYPE_PASSTHRU, 0);
4842
4843         if (adapter->num_vfs)
4844                 be_vf_setup(adapter);
4845
4846         status = be_cmd_get_phy_info(adapter);
4847         if (!status && be_pause_supported(adapter))
4848                 adapter->phy.fc_autoneg = 1;
4849
4850         if (be_physfn(adapter) && !lancer_chip(adapter))
4851                 be_cmd_set_features(adapter);
4852
4853         be_schedule_worker(adapter);
4854         adapter->flags |= BE_FLAGS_SETUP_DONE;
4855         return 0;
4856 err:
4857         be_clear(adapter);
4858         return status;
4859 }
4860
4861 #ifdef CONFIG_NET_POLL_CONTROLLER
4862 static void be_netpoll(struct net_device *netdev)
4863 {
4864         struct be_adapter *adapter = netdev_priv(netdev);
4865         struct be_eq_obj *eqo;
4866         int i;
4867
4868         for_all_evt_queues(adapter, eqo, i) {
4869                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4870                 napi_schedule(&eqo->napi);
4871         }
4872 }
4873 #endif
4874
4875 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4876 {
4877         const struct firmware *fw;
4878         int status;
4879
4880         if (!netif_running(adapter->netdev)) {
4881                 dev_err(&adapter->pdev->dev,
4882                         "Firmware load not allowed (interface is down)\n");
4883                 return -ENETDOWN;
4884         }
4885
4886         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4887         if (status)
4888                 goto fw_exit;
4889
4890         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4891
4892         if (lancer_chip(adapter))
4893                 status = lancer_fw_download(adapter, fw);
4894         else
4895                 status = be_fw_download(adapter, fw);
4896
4897         if (!status)
4898                 be_cmd_get_fw_ver(adapter);
4899
4900 fw_exit:
4901         release_firmware(fw);
4902         return status;
4903 }
4904
4905 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4906                                  u16 flags)
4907 {
4908         struct be_adapter *adapter = netdev_priv(dev);
4909         struct nlattr *attr, *br_spec;
4910         int rem;
4911         int status = 0;
4912         u16 mode = 0;
4913
4914         if (!sriov_enabled(adapter))
4915                 return -EOPNOTSUPP;
4916
4917         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4918         if (!br_spec)
4919                 return -EINVAL;
4920
4921         nla_for_each_nested(attr, br_spec, rem) {
4922                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4923                         continue;
4924
4925                 if (nla_len(attr) < sizeof(mode))
4926                         return -EINVAL;
4927
4928                 mode = nla_get_u16(attr);
4929                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4930                         return -EOPNOTSUPP;
4931
4932                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4933                         return -EINVAL;
4934
4935                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4936                                                adapter->if_handle,
4937                                                mode == BRIDGE_MODE_VEPA ?
4938                                                PORT_FWD_TYPE_VEPA :
4939                                                PORT_FWD_TYPE_VEB, 0);
4940                 if (status)
4941                         goto err;
4942
4943                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4944                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4945
4946                 return status;
4947         }
4948 err:
4949         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4950                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4951
4952         return status;
4953 }
4954
4955 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4956                                  struct net_device *dev, u32 filter_mask,
4957                                  int nlflags)
4958 {
4959         struct be_adapter *adapter = netdev_priv(dev);
4960         int status = 0;
4961         u8 hsw_mode;
4962
4963         /* BE and Lancer chips support VEB mode only */
4964         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4965                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4966                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4967                         return 0;
4968                 hsw_mode = PORT_FWD_TYPE_VEB;
4969         } else {
4970                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4971                                                adapter->if_handle, &hsw_mode,
4972                                                NULL);
4973                 if (status)
4974                         return 0;
4975
4976                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4977                         return 0;
4978         }
4979
4980         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4981                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4982                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4983                                        0, 0, nlflags, filter_mask, NULL);
4984 }
4985
4986 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4987                                          void (*func)(struct work_struct *))
4988 {
4989         struct be_cmd_work *work;
4990
4991         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4992         if (!work) {
4993                 dev_err(&adapter->pdev->dev,
4994                         "be_work memory allocation failed\n");
4995                 return NULL;
4996         }
4997
4998         INIT_WORK(&work->work, func);
4999         work->adapter = adapter;
5000         return work;
5001 }
5002
5003 /* VxLAN offload Notes:
5004  *
5005  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5006  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5007  * is expected to work across all types of IP tunnels once exported. Skyhawk
5008  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5009  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5010  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5011  * those other tunnels are unexported on the fly through ndo_features_check().
5012  *
5013  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5014  * adds more than one port, disable offloads and don't re-enable them again
5015  * until after all the tunnels are removed.
5016  */
5017 static void be_work_add_vxlan_port(struct work_struct *work)
5018 {
5019         struct be_cmd_work *cmd_work =
5020                                 container_of(work, struct be_cmd_work, work);
5021         struct be_adapter *adapter = cmd_work->adapter;
5022         struct net_device *netdev = adapter->netdev;
5023         struct device *dev = &adapter->pdev->dev;
5024         __be16 port = cmd_work->info.vxlan_port;
5025         int status;
5026
5027         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
5028                 adapter->vxlan_port_aliases++;
5029                 goto done;
5030         }
5031
5032         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5033                 dev_info(dev,
5034                          "Only one UDP port supported for VxLAN offloads\n");
5035                 dev_info(dev, "Disabling VxLAN offloads\n");
5036                 adapter->vxlan_port_count++;
5037                 goto err;
5038         }
5039
5040         if (adapter->vxlan_port_count++ >= 1)
5041                 goto done;
5042
5043         status = be_cmd_manage_iface(adapter, adapter->if_handle,
5044                                      OP_CONVERT_NORMAL_TO_TUNNEL);
5045         if (status) {
5046                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
5047                 goto err;
5048         }
5049
5050         status = be_cmd_set_vxlan_port(adapter, port);
5051         if (status) {
5052                 dev_warn(dev, "Failed to add VxLAN port\n");
5053                 goto err;
5054         }
5055         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
5056         adapter->vxlan_port = port;
5057
5058         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
5059                                    NETIF_F_TSO | NETIF_F_TSO6 |
5060                                    NETIF_F_GSO_UDP_TUNNEL;
5061         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
5062         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
5063
5064         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
5065                  be16_to_cpu(port));
5066         goto done;
5067 err:
5068         be_disable_vxlan_offloads(adapter);
5069 done:
5070         kfree(cmd_work);
5071 }
5072
5073 static void be_work_del_vxlan_port(struct work_struct *work)
5074 {
5075         struct be_cmd_work *cmd_work =
5076                                 container_of(work, struct be_cmd_work, work);
5077         struct be_adapter *adapter = cmd_work->adapter;
5078         __be16 port = cmd_work->info.vxlan_port;
5079
5080         if (adapter->vxlan_port != port)
5081                 goto done;
5082
5083         if (adapter->vxlan_port_aliases) {
5084                 adapter->vxlan_port_aliases--;
5085                 goto out;
5086         }
5087
5088         be_disable_vxlan_offloads(adapter);
5089
5090         dev_info(&adapter->pdev->dev,
5091                  "Disabled VxLAN offloads for UDP port %d\n",
5092                  be16_to_cpu(port));
5093 done:
5094         adapter->vxlan_port_count--;
5095 out:
5096         kfree(cmd_work);
5097 }
5098
5099 static void be_cfg_vxlan_port(struct net_device *netdev,
5100                               struct udp_tunnel_info *ti,
5101                               void (*func)(struct work_struct *))
5102 {
5103         struct be_adapter *adapter = netdev_priv(netdev);
5104         struct be_cmd_work *cmd_work;
5105
5106         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5107                 return;
5108
5109         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5110                 return;
5111
5112         cmd_work = be_alloc_work(adapter, func);
5113         if (cmd_work) {
5114                 cmd_work->info.vxlan_port = ti->port;
5115                 queue_work(be_wq, &cmd_work->work);
5116         }
5117 }
5118
5119 static void be_del_vxlan_port(struct net_device *netdev,
5120                               struct udp_tunnel_info *ti)
5121 {
5122         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5123 }
5124
5125 static void be_add_vxlan_port(struct net_device *netdev,
5126                               struct udp_tunnel_info *ti)
5127 {
5128         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5129 }
5130
5131 static netdev_features_t be_features_check(struct sk_buff *skb,
5132                                            struct net_device *dev,
5133                                            netdev_features_t features)
5134 {
5135         struct be_adapter *adapter = netdev_priv(dev);
5136         u8 l4_hdr = 0;
5137
5138         /* The code below restricts offload features for some tunneled packets.
5139          * Offload features for normal (non tunnel) packets are unchanged.
5140          */
5141         if (!skb->encapsulation ||
5142             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5143                 return features;
5144
5145         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5146          * should disable tunnel offload features if it's not a VxLAN packet,
5147          * as tunnel offloads have been enabled only for VxLAN. This is done to
5148          * allow other tunneled traffic like GRE work fine while VxLAN
5149          * offloads are configured in Skyhawk-R.
5150          */
5151         switch (vlan_get_protocol(skb)) {
5152         case htons(ETH_P_IP):
5153                 l4_hdr = ip_hdr(skb)->protocol;
5154                 break;
5155         case htons(ETH_P_IPV6):
5156                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5157                 break;
5158         default:
5159                 return features;
5160         }
5161
5162         if (l4_hdr != IPPROTO_UDP ||
5163             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5164             skb->inner_protocol != htons(ETH_P_TEB) ||
5165             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5166             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5167                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5168
5169         return features;
5170 }
5171
5172 static int be_get_phys_port_id(struct net_device *dev,
5173                                struct netdev_phys_item_id *ppid)
5174 {
5175         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5176         struct be_adapter *adapter = netdev_priv(dev);
5177         u8 *id;
5178
5179         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5180                 return -ENOSPC;
5181
5182         ppid->id[0] = adapter->hba_port_num + 1;
5183         id = &ppid->id[1];
5184         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5185              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5186                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5187
5188         ppid->id_len = id_len;
5189
5190         return 0;
5191 }
5192
5193 static void be_set_rx_mode(struct net_device *dev)
5194 {
5195         struct be_adapter *adapter = netdev_priv(dev);
5196         struct be_cmd_work *work;
5197
5198         work = be_alloc_work(adapter, be_work_set_rx_mode);
5199         if (work)
5200                 queue_work(be_wq, &work->work);
5201 }
5202
5203 static const struct net_device_ops be_netdev_ops = {
5204         .ndo_open               = be_open,
5205         .ndo_stop               = be_close,
5206         .ndo_start_xmit         = be_xmit,
5207         .ndo_set_rx_mode        = be_set_rx_mode,
5208         .ndo_set_mac_address    = be_mac_addr_set,
5209         .ndo_change_mtu         = be_change_mtu,
5210         .ndo_get_stats64        = be_get_stats64,
5211         .ndo_validate_addr      = eth_validate_addr,
5212         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5213         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5214         .ndo_set_vf_mac         = be_set_vf_mac,
5215         .ndo_set_vf_vlan        = be_set_vf_vlan,
5216         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5217         .ndo_get_vf_config      = be_get_vf_config,
5218         .ndo_set_vf_link_state  = be_set_vf_link_state,
5219         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5220 #ifdef CONFIG_NET_POLL_CONTROLLER
5221         .ndo_poll_controller    = be_netpoll,
5222 #endif
5223         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5224         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5225 #ifdef CONFIG_NET_RX_BUSY_POLL
5226         .ndo_busy_poll          = be_busy_poll,
5227 #endif
5228         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5229         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5230         .ndo_features_check     = be_features_check,
5231         .ndo_get_phys_port_id   = be_get_phys_port_id,
5232 };
5233
5234 static void be_netdev_init(struct net_device *netdev)
5235 {
5236         struct be_adapter *adapter = netdev_priv(netdev);
5237
5238         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5239                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5240                 NETIF_F_HW_VLAN_CTAG_TX;
5241         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5242                 netdev->hw_features |= NETIF_F_RXHASH;
5243
5244         netdev->features |= netdev->hw_features |
5245                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5246
5247         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5248                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5249
5250         netdev->priv_flags |= IFF_UNICAST_FLT;
5251
5252         netdev->flags |= IFF_MULTICAST;
5253
5254         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5255
5256         netdev->netdev_ops = &be_netdev_ops;
5257
5258         netdev->ethtool_ops = &be_ethtool_ops;
5259 }
5260
5261 static void be_cleanup(struct be_adapter *adapter)
5262 {
5263         struct net_device *netdev = adapter->netdev;
5264
5265         rtnl_lock();
5266         netif_device_detach(netdev);
5267         if (netif_running(netdev))
5268                 be_close(netdev);
5269         rtnl_unlock();
5270
5271         be_clear(adapter);
5272 }
5273
5274 static int be_resume(struct be_adapter *adapter)
5275 {
5276         struct net_device *netdev = adapter->netdev;
5277         int status;
5278
5279         status = be_setup(adapter);
5280         if (status)
5281                 return status;
5282
5283         rtnl_lock();
5284         if (netif_running(netdev))
5285                 status = be_open(netdev);
5286         rtnl_unlock();
5287
5288         if (status)
5289                 return status;
5290
5291         netif_device_attach(netdev);
5292
5293         return 0;
5294 }
5295
5296 static void be_soft_reset(struct be_adapter *adapter)
5297 {
5298         u32 val;
5299
5300         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5301         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5302         val |= SLIPORT_SOFTRESET_SR_MASK;
5303         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5304 }
5305
5306 static bool be_err_is_recoverable(struct be_adapter *adapter)
5307 {
5308         struct be_error_recovery *err_rec = &adapter->error_recovery;
5309         unsigned long initial_idle_time =
5310                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5311         unsigned long recovery_interval =
5312                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5313         u16 ue_err_code;
5314         u32 val;
5315
5316         val = be_POST_stage_get(adapter);
5317         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5318                 return false;
5319         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5320         if (ue_err_code == 0)
5321                 return false;
5322
5323         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5324                 ue_err_code);
5325
5326         if (jiffies - err_rec->probe_time <= initial_idle_time) {
5327                 dev_err(&adapter->pdev->dev,
5328                         "Cannot recover within %lu sec from driver load\n",
5329                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5330                 return false;
5331         }
5332
5333         if (err_rec->last_recovery_time &&
5334             (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
5335                 dev_err(&adapter->pdev->dev,
5336                         "Cannot recover within %lu sec from last recovery\n",
5337                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5338                 return false;
5339         }
5340
5341         if (ue_err_code == err_rec->last_err_code) {
5342                 dev_err(&adapter->pdev->dev,
5343                         "Cannot recover from a consecutive TPE error\n");
5344                 return false;
5345         }
5346
5347         err_rec->last_recovery_time = jiffies;
5348         err_rec->last_err_code = ue_err_code;
5349         return true;
5350 }
5351
5352 static int be_tpe_recover(struct be_adapter *adapter)
5353 {
5354         struct be_error_recovery *err_rec = &adapter->error_recovery;
5355         int status = -EAGAIN;
5356         u32 val;
5357
5358         switch (err_rec->recovery_state) {
5359         case ERR_RECOVERY_ST_NONE:
5360                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5361                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5362                 break;
5363
5364         case ERR_RECOVERY_ST_DETECT:
5365                 val = be_POST_stage_get(adapter);
5366                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5367                     POST_STAGE_RECOVERABLE_ERR) {
5368                         dev_err(&adapter->pdev->dev,
5369                                 "Unrecoverable HW error detected: 0x%x\n", val);
5370                         status = -EINVAL;
5371                         err_rec->resched_delay = 0;
5372                         break;
5373                 }
5374
5375                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5376
5377                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5378                  * milliseconds before it checks for final error status in
5379                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5380                  * If it does, then PF0 initiates a Soft Reset.
5381                  */
5382                 if (adapter->pf_num == 0) {
5383                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5384                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5385                                         ERR_RECOVERY_UE_DETECT_DURATION;
5386                         break;
5387                 }
5388
5389                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5390                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5391                                         ERR_RECOVERY_UE_DETECT_DURATION;
5392                 break;
5393
5394         case ERR_RECOVERY_ST_RESET:
5395                 if (!be_err_is_recoverable(adapter)) {
5396                         dev_err(&adapter->pdev->dev,
5397                                 "Failed to meet recovery criteria\n");
5398                         status = -EIO;
5399                         err_rec->resched_delay = 0;
5400                         break;
5401                 }
5402                 be_soft_reset(adapter);
5403                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5404                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5405                                         err_rec->ue_to_reset_time;
5406                 break;
5407
5408         case ERR_RECOVERY_ST_PRE_POLL:
5409                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5410                 err_rec->resched_delay = 0;
5411                 status = 0;                     /* done */
5412                 break;
5413
5414         default:
5415                 status = -EINVAL;
5416                 err_rec->resched_delay = 0;
5417                 break;
5418         }
5419
5420         return status;
5421 }
5422
5423 static int be_err_recover(struct be_adapter *adapter)
5424 {
5425         int status;
5426
5427         if (!lancer_chip(adapter)) {
5428                 if (!adapter->error_recovery.recovery_supported ||
5429                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5430                         return -EIO;
5431                 status = be_tpe_recover(adapter);
5432                 if (status)
5433                         goto err;
5434         }
5435
5436         /* Wait for adapter to reach quiescent state before
5437          * destroying queues
5438          */
5439         status = be_fw_wait_ready(adapter);
5440         if (status)
5441                 goto err;
5442
5443         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5444
5445         be_cleanup(adapter);
5446
5447         status = be_resume(adapter);
5448         if (status)
5449                 goto err;
5450
5451         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5452
5453 err:
5454         return status;
5455 }
5456
5457 static void be_err_detection_task(struct work_struct *work)
5458 {
5459         struct be_error_recovery *err_rec =
5460                         container_of(work, struct be_error_recovery,
5461                                      err_detection_work.work);
5462         struct be_adapter *adapter =
5463                         container_of(err_rec, struct be_adapter,
5464                                      error_recovery);
5465         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5466         struct device *dev = &adapter->pdev->dev;
5467         int recovery_status;
5468
5469         be_detect_error(adapter);
5470         if (!be_check_error(adapter, BE_ERROR_HW))
5471                 goto reschedule_task;
5472
5473         recovery_status = be_err_recover(adapter);
5474         if (!recovery_status) {
5475                 err_rec->recovery_retries = 0;
5476                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5477                 dev_info(dev, "Adapter recovery successful\n");
5478                 goto reschedule_task;
5479         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5480                 /* BEx/SH recovery state machine */
5481                 if (adapter->pf_num == 0 &&
5482                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5483                         dev_err(&adapter->pdev->dev,
5484                                 "Adapter recovery in progress\n");
5485                 resched_delay = err_rec->resched_delay;
5486                 goto reschedule_task;
5487         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5488                 /* For VFs, check if PF have allocated resources
5489                  * every second.
5490                  */
5491                 dev_err(dev, "Re-trying adapter recovery\n");
5492                 goto reschedule_task;
5493         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5494                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5495                 /* In case of another error during recovery, it takes 30 sec
5496                  * for adapter to come out of error. Retry error recovery after
5497                  * this time interval.
5498                  */
5499                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5500                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5501                 goto reschedule_task;
5502         } else {
5503                 dev_err(dev, "Adapter recovery failed\n");
5504                 dev_err(dev, "Please reboot server to recover\n");
5505         }
5506
5507         return;
5508
5509 reschedule_task:
5510         be_schedule_err_detection(adapter, resched_delay);
5511 }
5512
5513 static void be_log_sfp_info(struct be_adapter *adapter)
5514 {
5515         int status;
5516
5517         status = be_cmd_query_sfp_info(adapter);
5518         if (!status) {
5519                 dev_err(&adapter->pdev->dev,
5520                         "Port %c: %s Vendor: %s part no: %s",
5521                         adapter->port_name,
5522                         be_misconfig_evt_port_state[adapter->phy_state],
5523                         adapter->phy.vendor_name,
5524                         adapter->phy.vendor_pn);
5525         }
5526         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5527 }
5528
5529 static void be_worker(struct work_struct *work)
5530 {
5531         struct be_adapter *adapter =
5532                 container_of(work, struct be_adapter, work.work);
5533         struct be_rx_obj *rxo;
5534         int i;
5535
5536         if (be_physfn(adapter) &&
5537             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5538                 be_cmd_get_die_temperature(adapter);
5539
5540         /* when interrupts are not yet enabled, just reap any pending
5541          * mcc completions
5542          */
5543         if (!netif_running(adapter->netdev)) {
5544                 local_bh_disable();
5545                 be_process_mcc(adapter);
5546                 local_bh_enable();
5547                 goto reschedule;
5548         }
5549
5550         if (!adapter->stats_cmd_sent) {
5551                 if (lancer_chip(adapter))
5552                         lancer_cmd_get_pport_stats(adapter,
5553                                                    &adapter->stats_cmd);
5554                 else
5555                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5556         }
5557
5558         for_all_rx_queues(adapter, rxo, i) {
5559                 /* Replenish RX-queues starved due to memory
5560                  * allocation failures.
5561                  */
5562                 if (rxo->rx_post_starved)
5563                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5564         }
5565
5566         /* EQ-delay update for Skyhawk is done while notifying EQ */
5567         if (!skyhawk_chip(adapter))
5568                 be_eqd_update(adapter, false);
5569
5570         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5571                 be_log_sfp_info(adapter);
5572
5573 reschedule:
5574         adapter->work_counter++;
5575         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5576 }
5577
5578 static void be_unmap_pci_bars(struct be_adapter *adapter)
5579 {
5580         if (adapter->csr)
5581                 pci_iounmap(adapter->pdev, adapter->csr);
5582         if (adapter->db)
5583                 pci_iounmap(adapter->pdev, adapter->db);
5584         if (adapter->pcicfg && adapter->pcicfg_mapped)
5585                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5586 }
5587
5588 static int db_bar(struct be_adapter *adapter)
5589 {
5590         if (lancer_chip(adapter) || be_virtfn(adapter))
5591                 return 0;
5592         else
5593                 return 4;
5594 }
5595
5596 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5597 {
5598         if (skyhawk_chip(adapter)) {
5599                 adapter->roce_db.size = 4096;
5600                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5601                                                               db_bar(adapter));
5602                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5603                                                                db_bar(adapter));
5604         }
5605         return 0;
5606 }
5607
5608 static int be_map_pci_bars(struct be_adapter *adapter)
5609 {
5610         struct pci_dev *pdev = adapter->pdev;
5611         u8 __iomem *addr;
5612         u32 sli_intf;
5613
5614         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5615         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5616                                 SLI_INTF_FAMILY_SHIFT;
5617         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5618
5619         if (BEx_chip(adapter) && be_physfn(adapter)) {
5620                 adapter->csr = pci_iomap(pdev, 2, 0);
5621                 if (!adapter->csr)
5622                         return -ENOMEM;
5623         }
5624
5625         addr = pci_iomap(pdev, db_bar(adapter), 0);
5626         if (!addr)
5627                 goto pci_map_err;
5628         adapter->db = addr;
5629
5630         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5631                 if (be_physfn(adapter)) {
5632                         /* PCICFG is the 2nd BAR in BE2 */
5633                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5634                         if (!addr)
5635                                 goto pci_map_err;
5636                         adapter->pcicfg = addr;
5637                         adapter->pcicfg_mapped = true;
5638                 } else {
5639                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5640                         adapter->pcicfg_mapped = false;
5641                 }
5642         }
5643
5644         be_roce_map_pci_bars(adapter);
5645         return 0;
5646
5647 pci_map_err:
5648         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5649         be_unmap_pci_bars(adapter);
5650         return -ENOMEM;
5651 }
5652
5653 static void be_drv_cleanup(struct be_adapter *adapter)
5654 {
5655         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5656         struct device *dev = &adapter->pdev->dev;
5657
5658         if (mem->va)
5659                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5660
5661         mem = &adapter->rx_filter;
5662         if (mem->va)
5663                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5664
5665         mem = &adapter->stats_cmd;
5666         if (mem->va)
5667                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5668 }
5669
5670 /* Allocate and initialize various fields in be_adapter struct */
5671 static int be_drv_init(struct be_adapter *adapter)
5672 {
5673         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5674         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5675         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5676         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5677         struct device *dev = &adapter->pdev->dev;
5678         int status = 0;
5679
5680         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5681         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5682                                                  &mbox_mem_alloc->dma,
5683                                                  GFP_KERNEL);
5684         if (!mbox_mem_alloc->va)
5685                 return -ENOMEM;
5686
5687         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5688         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5689         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5690
5691         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5692         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5693                                             &rx_filter->dma, GFP_KERNEL);
5694         if (!rx_filter->va) {
5695                 status = -ENOMEM;
5696                 goto free_mbox;
5697         }
5698
5699         if (lancer_chip(adapter))
5700                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5701         else if (BE2_chip(adapter))
5702                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5703         else if (BE3_chip(adapter))
5704                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5705         else
5706                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5707         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5708                                             &stats_cmd->dma, GFP_KERNEL);
5709         if (!stats_cmd->va) {
5710                 status = -ENOMEM;
5711                 goto free_rx_filter;
5712         }
5713
5714         mutex_init(&adapter->mbox_lock);
5715         mutex_init(&adapter->mcc_lock);
5716         mutex_init(&adapter->rx_filter_lock);
5717         spin_lock_init(&adapter->mcc_cq_lock);
5718         init_completion(&adapter->et_cmd_compl);
5719
5720         pci_save_state(adapter->pdev);
5721
5722         INIT_DELAYED_WORK(&adapter->work, be_worker);
5723
5724         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5725         adapter->error_recovery.resched_delay = 0;
5726         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5727                           be_err_detection_task);
5728
5729         adapter->rx_fc = true;
5730         adapter->tx_fc = true;
5731
5732         /* Must be a power of 2 or else MODULO will BUG_ON */
5733         adapter->be_get_temp_freq = 64;
5734
5735         return 0;
5736
5737 free_rx_filter:
5738         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5739 free_mbox:
5740         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5741                           mbox_mem_alloc->dma);
5742         return status;
5743 }
5744
5745 static void be_remove(struct pci_dev *pdev)
5746 {
5747         struct be_adapter *adapter = pci_get_drvdata(pdev);
5748
5749         if (!adapter)
5750                 return;
5751
5752         be_roce_dev_remove(adapter);
5753         be_intr_set(adapter, false);
5754
5755         be_cancel_err_detection(adapter);
5756
5757         unregister_netdev(adapter->netdev);
5758
5759         be_clear(adapter);
5760
5761         if (!pci_vfs_assigned(adapter->pdev))
5762                 be_cmd_reset_function(adapter);
5763
5764         /* tell fw we're done with firing cmds */
5765         be_cmd_fw_clean(adapter);
5766
5767         be_unmap_pci_bars(adapter);
5768         be_drv_cleanup(adapter);
5769
5770         pci_disable_pcie_error_reporting(pdev);
5771
5772         pci_release_regions(pdev);
5773         pci_disable_device(pdev);
5774
5775         free_netdev(adapter->netdev);
5776 }
5777
5778 static ssize_t be_hwmon_show_temp(struct device *dev,
5779                                   struct device_attribute *dev_attr,
5780                                   char *buf)
5781 {
5782         struct be_adapter *adapter = dev_get_drvdata(dev);
5783
5784         /* Unit: millidegree Celsius */
5785         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5786                 return -EIO;
5787         else
5788                 return sprintf(buf, "%u\n",
5789                                adapter->hwmon_info.be_on_die_temp * 1000);
5790 }
5791
5792 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5793                           be_hwmon_show_temp, NULL, 1);
5794
5795 static struct attribute *be_hwmon_attrs[] = {
5796         &sensor_dev_attr_temp1_input.dev_attr.attr,
5797         NULL
5798 };
5799
5800 ATTRIBUTE_GROUPS(be_hwmon);
5801
5802 static char *mc_name(struct be_adapter *adapter)
5803 {
5804         char *str = ""; /* default */
5805
5806         switch (adapter->mc_type) {
5807         case UMC:
5808                 str = "UMC";
5809                 break;
5810         case FLEX10:
5811                 str = "FLEX10";
5812                 break;
5813         case vNIC1:
5814                 str = "vNIC-1";
5815                 break;
5816         case nPAR:
5817                 str = "nPAR";
5818                 break;
5819         case UFP:
5820                 str = "UFP";
5821                 break;
5822         case vNIC2:
5823                 str = "vNIC-2";
5824                 break;
5825         default:
5826                 str = "";
5827         }
5828
5829         return str;
5830 }
5831
5832 static inline char *func_name(struct be_adapter *adapter)
5833 {
5834         return be_physfn(adapter) ? "PF" : "VF";
5835 }
5836
5837 static inline char *nic_name(struct pci_dev *pdev)
5838 {
5839         switch (pdev->device) {
5840         case OC_DEVICE_ID1:
5841                 return OC_NAME;
5842         case OC_DEVICE_ID2:
5843                 return OC_NAME_BE;
5844         case OC_DEVICE_ID3:
5845         case OC_DEVICE_ID4:
5846                 return OC_NAME_LANCER;
5847         case BE_DEVICE_ID2:
5848                 return BE3_NAME;
5849         case OC_DEVICE_ID5:
5850         case OC_DEVICE_ID6:
5851                 return OC_NAME_SH;
5852         default:
5853                 return BE_NAME;
5854         }
5855 }
5856
5857 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5858 {
5859         struct be_adapter *adapter;
5860         struct net_device *netdev;
5861         int status = 0;
5862
5863         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5864
5865         status = pci_enable_device(pdev);
5866         if (status)
5867                 goto do_none;
5868
5869         status = pci_request_regions(pdev, DRV_NAME);
5870         if (status)
5871                 goto disable_dev;
5872         pci_set_master(pdev);
5873
5874         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5875         if (!netdev) {
5876                 status = -ENOMEM;
5877                 goto rel_reg;
5878         }
5879         adapter = netdev_priv(netdev);
5880         adapter->pdev = pdev;
5881         pci_set_drvdata(pdev, adapter);
5882         adapter->netdev = netdev;
5883         SET_NETDEV_DEV(netdev, &pdev->dev);
5884
5885         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5886         if (!status) {
5887                 netdev->features |= NETIF_F_HIGHDMA;
5888         } else {
5889                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5890                 if (status) {
5891                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5892                         goto free_netdev;
5893                 }
5894         }
5895
5896         status = pci_enable_pcie_error_reporting(pdev);
5897         if (!status)
5898                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5899
5900         status = be_map_pci_bars(adapter);
5901         if (status)
5902                 goto free_netdev;
5903
5904         status = be_drv_init(adapter);
5905         if (status)
5906                 goto unmap_bars;
5907
5908         status = be_setup(adapter);
5909         if (status)
5910                 goto drv_cleanup;
5911
5912         be_netdev_init(netdev);
5913         status = register_netdev(netdev);
5914         if (status != 0)
5915                 goto unsetup;
5916
5917         be_roce_dev_add(adapter);
5918
5919         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5920         adapter->error_recovery.probe_time = jiffies;
5921
5922         /* On Die temperature not supported for VF. */
5923         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5924                 adapter->hwmon_info.hwmon_dev =
5925                         devm_hwmon_device_register_with_groups(&pdev->dev,
5926                                                                DRV_NAME,
5927                                                                adapter,
5928                                                                be_hwmon_groups);
5929                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5930         }
5931
5932         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5933                  func_name(adapter), mc_name(adapter), adapter->port_name);
5934
5935         return 0;
5936
5937 unsetup:
5938         be_clear(adapter);
5939 drv_cleanup:
5940         be_drv_cleanup(adapter);
5941 unmap_bars:
5942         be_unmap_pci_bars(adapter);
5943 free_netdev:
5944         free_netdev(netdev);
5945 rel_reg:
5946         pci_release_regions(pdev);
5947 disable_dev:
5948         pci_disable_device(pdev);
5949 do_none:
5950         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5951         return status;
5952 }
5953
5954 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5955 {
5956         struct be_adapter *adapter = pci_get_drvdata(pdev);
5957
5958         be_intr_set(adapter, false);
5959         be_cancel_err_detection(adapter);
5960
5961         be_cleanup(adapter);
5962
5963         pci_save_state(pdev);
5964         pci_disable_device(pdev);
5965         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5966         return 0;
5967 }
5968
5969 static int be_pci_resume(struct pci_dev *pdev)
5970 {
5971         struct be_adapter *adapter = pci_get_drvdata(pdev);
5972         int status = 0;
5973
5974         status = pci_enable_device(pdev);
5975         if (status)
5976                 return status;
5977
5978         pci_restore_state(pdev);
5979
5980         status = be_resume(adapter);
5981         if (status)
5982                 return status;
5983
5984         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5985
5986         return 0;
5987 }
5988
5989 /*
5990  * An FLR will stop BE from DMAing any data.
5991  */
5992 static void be_shutdown(struct pci_dev *pdev)
5993 {
5994         struct be_adapter *adapter = pci_get_drvdata(pdev);
5995
5996         if (!adapter)
5997                 return;
5998
5999         be_roce_dev_shutdown(adapter);
6000         cancel_delayed_work_sync(&adapter->work);
6001         be_cancel_err_detection(adapter);
6002
6003         netif_device_detach(adapter->netdev);
6004
6005         be_cmd_reset_function(adapter);
6006
6007         pci_disable_device(pdev);
6008 }
6009
6010 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6011                                             pci_channel_state_t state)
6012 {
6013         struct be_adapter *adapter = pci_get_drvdata(pdev);
6014
6015         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6016
6017         be_roce_dev_remove(adapter);
6018
6019         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6020                 be_set_error(adapter, BE_ERROR_EEH);
6021
6022                 be_cancel_err_detection(adapter);
6023
6024                 be_cleanup(adapter);
6025         }
6026
6027         if (state == pci_channel_io_perm_failure)
6028                 return PCI_ERS_RESULT_DISCONNECT;
6029
6030         pci_disable_device(pdev);
6031
6032         /* The error could cause the FW to trigger a flash debug dump.
6033          * Resetting the card while flash dump is in progress
6034          * can cause it not to recover; wait for it to finish.
6035          * Wait only for first function as it is needed only once per
6036          * adapter.
6037          */
6038         if (pdev->devfn == 0)
6039                 ssleep(30);
6040
6041         return PCI_ERS_RESULT_NEED_RESET;
6042 }
6043
6044 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6045 {
6046         struct be_adapter *adapter = pci_get_drvdata(pdev);
6047         int status;
6048
6049         dev_info(&adapter->pdev->dev, "EEH reset\n");
6050
6051         status = pci_enable_device(pdev);
6052         if (status)
6053                 return PCI_ERS_RESULT_DISCONNECT;
6054
6055         pci_set_master(pdev);
6056         pci_restore_state(pdev);
6057
6058         /* Check if card is ok and fw is ready */
6059         dev_info(&adapter->pdev->dev,
6060                  "Waiting for FW to be ready after EEH reset\n");
6061         status = be_fw_wait_ready(adapter);
6062         if (status)
6063                 return PCI_ERS_RESULT_DISCONNECT;
6064
6065         pci_cleanup_aer_uncorrect_error_status(pdev);
6066         be_clear_error(adapter, BE_CLEAR_ALL);
6067         return PCI_ERS_RESULT_RECOVERED;
6068 }
6069
6070 static void be_eeh_resume(struct pci_dev *pdev)
6071 {
6072         int status = 0;
6073         struct be_adapter *adapter = pci_get_drvdata(pdev);
6074
6075         dev_info(&adapter->pdev->dev, "EEH resume\n");
6076
6077         pci_save_state(pdev);
6078
6079         status = be_resume(adapter);
6080         if (status)
6081                 goto err;
6082
6083         be_roce_dev_add(adapter);
6084
6085         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6086         return;
6087 err:
6088         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6089 }
6090
6091 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6092 {
6093         struct be_adapter *adapter = pci_get_drvdata(pdev);
6094         struct be_resources vft_res = {0};
6095         int status;
6096
6097         if (!num_vfs)
6098                 be_vf_clear(adapter);
6099
6100         adapter->num_vfs = num_vfs;
6101
6102         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6103                 dev_warn(&pdev->dev,
6104                          "Cannot disable VFs while they are assigned\n");
6105                 return -EBUSY;
6106         }
6107
6108         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6109          * are equally distributed across the max-number of VFs. The user may
6110          * request only a subset of the max-vfs to be enabled.
6111          * Based on num_vfs, redistribute the resources across num_vfs so that
6112          * each VF will have access to more number of resources.
6113          * This facility is not available in BE3 FW.
6114          * Also, this is done by FW in Lancer chip.
6115          */
6116         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6117                 be_calculate_vf_res(adapter, adapter->num_vfs,
6118                                     &vft_res);
6119                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6120                                                  adapter->num_vfs, &vft_res);
6121                 if (status)
6122                         dev_err(&pdev->dev,
6123                                 "Failed to optimize SR-IOV resources\n");
6124         }
6125
6126         status = be_get_resources(adapter);
6127         if (status)
6128                 return be_cmd_status(status);
6129
6130         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6131         rtnl_lock();
6132         status = be_update_queues(adapter);
6133         rtnl_unlock();
6134         if (status)
6135                 return be_cmd_status(status);
6136
6137         if (adapter->num_vfs)
6138                 status = be_vf_setup(adapter);
6139
6140         if (!status)
6141                 return adapter->num_vfs;
6142
6143         return 0;
6144 }
6145
6146 static const struct pci_error_handlers be_eeh_handlers = {
6147         .error_detected = be_eeh_err_detected,
6148         .slot_reset = be_eeh_reset,
6149         .resume = be_eeh_resume,
6150 };
6151
6152 static struct pci_driver be_driver = {
6153         .name = DRV_NAME,
6154         .id_table = be_dev_ids,
6155         .probe = be_probe,
6156         .remove = be_remove,
6157         .suspend = be_suspend,
6158         .resume = be_pci_resume,
6159         .shutdown = be_shutdown,
6160         .sriov_configure = be_pci_sriov_configure,
6161         .err_handler = &be_eeh_handlers
6162 };
6163
6164 static int __init be_init_module(void)
6165 {
6166         int status;
6167
6168         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6169             rx_frag_size != 2048) {
6170                 printk(KERN_WARNING DRV_NAME
6171                         " : Module param rx_frag_size must be 2048/4096/8192."
6172                         " Using 2048\n");
6173                 rx_frag_size = 2048;
6174         }
6175
6176         if (num_vfs > 0) {
6177                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6178                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6179         }
6180
6181         be_wq = create_singlethread_workqueue("be_wq");
6182         if (!be_wq) {
6183                 pr_warn(DRV_NAME "workqueue creation failed\n");
6184                 return -1;
6185         }
6186
6187         be_err_recovery_workq =
6188                 create_singlethread_workqueue("be_err_recover");
6189         if (!be_err_recovery_workq)
6190                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6191
6192         status = pci_register_driver(&be_driver);
6193         if (status) {
6194                 destroy_workqueue(be_wq);
6195                 be_destroy_err_recovery_workq();
6196         }
6197         return status;
6198 }
6199 module_init(be_init_module);
6200
6201 static void __exit be_exit_module(void)
6202 {
6203         pci_unregister_driver(&be_driver);
6204
6205         be_destroy_err_recovery_workq();
6206
6207         if (be_wq)
6208                 destroy_workqueue(be_wq);
6209 }
6210 module_exit(be_exit_module);