Merge branches 'pm-cpufreq-fixes' and 'pm-cpuidle'
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2015 Emulex
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58         "CEV",
59         "CTX",
60         "DBUF",
61         "ERX",
62         "Host",
63         "MPU",
64         "NDMA",
65         "PTC ",
66         "RDMA ",
67         "RXF ",
68         "RXIPS ",
69         "RXULP0 ",
70         "RXULP1 ",
71         "RXULP2 ",
72         "TIM ",
73         "TPOST ",
74         "TPRE ",
75         "TXIPS ",
76         "TXULP0 ",
77         "TXULP1 ",
78         "UC ",
79         "WDMA ",
80         "TXULP2 ",
81         "HOST1 ",
82         "P0_OB_LINK ",
83         "P1_OB_LINK ",
84         "HOST_GPIO ",
85         "MBOX ",
86         "ERX2 ",
87         "SPARE ",
88         "JTAG ",
89         "MPU_INTPEND "
90 };
91
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94         "LPCMEMHOST",
95         "MGMT_MAC",
96         "PCS0ONLINE",
97         "MPU_IRAM",
98         "PCS1ONLINE",
99         "PCTL0",
100         "PCTL1",
101         "PMEM",
102         "RR",
103         "TXPB",
104         "RXPP",
105         "XAUI",
106         "TXP",
107         "ARM",
108         "IPC",
109         "HOST2",
110         "HOST3",
111         "HOST4",
112         "HOST5",
113         "HOST6",
114         "HOST7",
115         "ECRC",
116         "Poison TLP",
117         "NETC",
118         "PERIPH",
119         "LLTXULP",
120         "D2P",
121         "RCON",
122         "LDMA",
123         "LLTXP",
124         "LLTXPB",
125         "Unknown"
126 };
127
128 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
129                                  BE_IF_FLAGS_BROADCAST | \
130                                  BE_IF_FLAGS_MULTICAST | \
131                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
132
133 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
134 {
135         struct be_dma_mem *mem = &q->dma_mem;
136
137         if (mem->va) {
138                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
139                                   mem->dma);
140                 mem->va = NULL;
141         }
142 }
143
144 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
145                           u16 len, u16 entry_size)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         memset(q, 0, sizeof(*q));
150         q->len = len;
151         q->entry_size = entry_size;
152         mem->size = len * entry_size;
153         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
154                                       GFP_KERNEL);
155         if (!mem->va)
156                 return -ENOMEM;
157         return 0;
158 }
159
160 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
161 {
162         u32 reg, enabled;
163
164         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
165                               &reg);
166         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167
168         if (!enabled && enable)
169                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
170         else if (enabled && !enable)
171                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
172         else
173                 return;
174
175         pci_write_config_dword(adapter->pdev,
176                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
177 }
178
179 static void be_intr_set(struct be_adapter *adapter, bool enable)
180 {
181         int status = 0;
182
183         /* On lancer interrupts can't be controlled via this register */
184         if (lancer_chip(adapter))
185                 return;
186
187         if (be_check_error(adapter, BE_ERROR_EEH))
188                 return;
189
190         status = be_cmd_intr_set(adapter, enable);
191         if (status)
192                 be_reg_intr_set(adapter, enable);
193 }
194
195 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
196 {
197         u32 val = 0;
198
199         if (be_check_error(adapter, BE_ERROR_HW))
200                 return;
201
202         val |= qid & DB_RQ_RING_ID_MASK;
203         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
204
205         wmb();
206         iowrite32(val, adapter->db + DB_RQ_OFFSET);
207 }
208
209 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
210                           u16 posted)
211 {
212         u32 val = 0;
213
214         if (be_check_error(adapter, BE_ERROR_HW))
215                 return;
216
217         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
218         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
219
220         wmb();
221         iowrite32(val, adapter->db + txo->db_offset);
222 }
223
224 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
225                          bool arm, bool clear_int, u16 num_popped,
226                          u32 eq_delay_mult_enc)
227 {
228         u32 val = 0;
229
230         val |= qid & DB_EQ_RING_ID_MASK;
231         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
232
233         if (be_check_error(adapter, BE_ERROR_HW))
234                 return;
235
236         if (arm)
237                 val |= 1 << DB_EQ_REARM_SHIFT;
238         if (clear_int)
239                 val |= 1 << DB_EQ_CLR_SHIFT;
240         val |= 1 << DB_EQ_EVNT_SHIFT;
241         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
242         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
243         iowrite32(val, adapter->db + DB_EQ_OFFSET);
244 }
245
246 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
247 {
248         u32 val = 0;
249
250         val |= qid & DB_CQ_RING_ID_MASK;
251         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
252                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
253
254         if (be_check_error(adapter, BE_ERROR_HW))
255                 return;
256
257         if (arm)
258                 val |= 1 << DB_CQ_REARM_SHIFT;
259         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
260         iowrite32(val, adapter->db + DB_CQ_OFFSET);
261 }
262
263 static int be_mac_addr_set(struct net_device *netdev, void *p)
264 {
265         struct be_adapter *adapter = netdev_priv(netdev);
266         struct device *dev = &adapter->pdev->dev;
267         struct sockaddr *addr = p;
268         int status;
269         u8 mac[ETH_ALEN];
270         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
271
272         if (!is_valid_ether_addr(addr->sa_data))
273                 return -EADDRNOTAVAIL;
274
275         /* Proceed further only if, User provided MAC is different
276          * from active MAC
277          */
278         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
279                 return 0;
280
281         /* if device is not running, copy MAC to netdev->dev_addr */
282         if (!netif_running(netdev))
283                 goto done;
284
285         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
286          * privilege or if PF did not provision the new MAC address.
287          * On BE3, this cmd will always fail if the VF doesn't have the
288          * FILTMGMT privilege. This failure is OK, only if the PF programmed
289          * the MAC for the VF.
290          */
291         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
292                                  adapter->if_handle, &adapter->pmac_id[0], 0);
293         if (!status) {
294                 curr_pmac_id = adapter->pmac_id[0];
295
296                 /* Delete the old programmed MAC. This call may fail if the
297                  * old MAC was already deleted by the PF driver.
298                  */
299                 if (adapter->pmac_id[0] != old_pmac_id)
300                         be_cmd_pmac_del(adapter, adapter->if_handle,
301                                         old_pmac_id, 0);
302         }
303
304         /* Decide if the new MAC is successfully activated only after
305          * querying the FW
306          */
307         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
308                                        adapter->if_handle, true, 0);
309         if (status)
310                 goto err;
311
312         /* The MAC change did not happen, either due to lack of privilege
313          * or PF didn't pre-provision.
314          */
315         if (!ether_addr_equal(addr->sa_data, mac)) {
316                 status = -EPERM;
317                 goto err;
318         }
319 done:
320         ether_addr_copy(netdev->dev_addr, addr->sa_data);
321         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
322         return 0;
323 err:
324         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
325         return status;
326 }
327
328 /* BE2 supports only v0 cmd */
329 static void *hw_stats_from_cmd(struct be_adapter *adapter)
330 {
331         if (BE2_chip(adapter)) {
332                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
333
334                 return &cmd->hw_stats;
335         } else if (BE3_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else {
340                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         }
344 }
345
346 /* BE2 supports only v0 cmd */
347 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
348 {
349         if (BE2_chip(adapter)) {
350                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
351
352                 return &hw_stats->erx;
353         } else if (BE3_chip(adapter)) {
354                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else {
358                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         }
362 }
363
364 static void populate_be_v0_stats(struct be_adapter *adapter)
365 {
366         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
367         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
368         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
369         struct be_port_rxf_stats_v0 *port_stats =
370                                         &rxf_stats->port[adapter->port_num];
371         struct be_drv_stats *drvs = &adapter->drv_stats;
372
373         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
374         drvs->rx_pause_frames = port_stats->rx_pause_frames;
375         drvs->rx_crc_errors = port_stats->rx_crc_errors;
376         drvs->rx_control_frames = port_stats->rx_control_frames;
377         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
378         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
379         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
380         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
381         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
382         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
383         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
384         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
385         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
386         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
387         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
388         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
389         drvs->rx_dropped_header_too_small =
390                 port_stats->rx_dropped_header_too_small;
391         drvs->rx_address_filtered =
392                                         port_stats->rx_address_filtered +
393                                         port_stats->rx_vlan_filtered;
394         drvs->rx_alignment_symbol_errors =
395                 port_stats->rx_alignment_symbol_errors;
396
397         drvs->tx_pauseframes = port_stats->tx_pauseframes;
398         drvs->tx_controlframes = port_stats->tx_controlframes;
399
400         if (adapter->port_num)
401                 drvs->jabber_events = rxf_stats->port1_jabber_events;
402         else
403                 drvs->jabber_events = rxf_stats->port0_jabber_events;
404         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
405         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
406         drvs->forwarded_packets = rxf_stats->forwarded_packets;
407         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
408         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
409         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
410         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
411 }
412
413 static void populate_be_v1_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v1 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
424         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
435         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
436         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
437         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_input_fifo_overflow_drop =
441                 port_stats->rx_input_fifo_overflow_drop;
442         drvs->rx_address_filtered = port_stats->rx_address_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
449         drvs->jabber_events = port_stats->jabber_events;
450         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
451         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
452         drvs->forwarded_packets = rxf_stats->forwarded_packets;
453         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
454         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
455         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
456         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
457 }
458
459 static void populate_be_v2_stats(struct be_adapter *adapter)
460 {
461         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
462         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
463         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
464         struct be_port_rxf_stats_v2 *port_stats =
465                                         &rxf_stats->port[adapter->port_num];
466         struct be_drv_stats *drvs = &adapter->drv_stats;
467
468         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
469         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
470         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
471         drvs->rx_pause_frames = port_stats->rx_pause_frames;
472         drvs->rx_crc_errors = port_stats->rx_crc_errors;
473         drvs->rx_control_frames = port_stats->rx_control_frames;
474         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
475         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
476         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
477         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
478         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
479         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
480         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
481         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
482         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
483         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
484         drvs->rx_dropped_header_too_small =
485                 port_stats->rx_dropped_header_too_small;
486         drvs->rx_input_fifo_overflow_drop =
487                 port_stats->rx_input_fifo_overflow_drop;
488         drvs->rx_address_filtered = port_stats->rx_address_filtered;
489         drvs->rx_alignment_symbol_errors =
490                 port_stats->rx_alignment_symbol_errors;
491         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
492         drvs->tx_pauseframes = port_stats->tx_pauseframes;
493         drvs->tx_controlframes = port_stats->tx_controlframes;
494         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
495         drvs->jabber_events = port_stats->jabber_events;
496         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
497         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
498         drvs->forwarded_packets = rxf_stats->forwarded_packets;
499         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
500         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
501         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
502         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
503         if (be_roce_supported(adapter)) {
504                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
505                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
506                 drvs->rx_roce_frames = port_stats->roce_frames_received;
507                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
508                 drvs->roce_drops_payload_len =
509                         port_stats->roce_drops_payload_len;
510         }
511 }
512
513 static void populate_lancer_stats(struct be_adapter *adapter)
514 {
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
517
518         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
519         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
520         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
521         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
522         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
523         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
524         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
525         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
526         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
527         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
528         drvs->rx_dropped_tcp_length =
529                                 pport_stats->rx_dropped_invalid_tcp_length;
530         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
533         drvs->rx_dropped_header_too_small =
534                                 pport_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536         drvs->rx_address_filtered =
537                                         pport_stats->rx_address_filtered +
538                                         pport_stats->rx_vlan_filtered;
539         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
540         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
541         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
542         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
543         drvs->jabber_events = pport_stats->rx_jabbers;
544         drvs->forwarded_packets = pport_stats->num_forwards_lo;
545         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
546         drvs->rx_drops_too_many_frags =
547                                 pport_stats->rx_drops_too_many_frags_lo;
548 }
549
550 static void accumulate_16bit_val(u32 *acc, u16 val)
551 {
552 #define lo(x)                   (x & 0xFFFF)
553 #define hi(x)                   (x & 0xFFFF0000)
554         bool wrapped = val < lo(*acc);
555         u32 newacc = hi(*acc) + val;
556
557         if (wrapped)
558                 newacc += 65536;
559         ACCESS_ONCE(*acc) = newacc;
560 }
561
562 static void populate_erx_stats(struct be_adapter *adapter,
563                                struct be_rx_obj *rxo, u32 erx_stat)
564 {
565         if (!BEx_chip(adapter))
566                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
567         else
568                 /* below erx HW counter can actually wrap around after
569                  * 65535. Driver accumulates a 32-bit value
570                  */
571                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
572                                      (u16)erx_stat);
573 }
574
575 void be_parse_stats(struct be_adapter *adapter)
576 {
577         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
578         struct be_rx_obj *rxo;
579         int i;
580         u32 erx_stat;
581
582         if (lancer_chip(adapter)) {
583                 populate_lancer_stats(adapter);
584         } else {
585                 if (BE2_chip(adapter))
586                         populate_be_v0_stats(adapter);
587                 else if (BE3_chip(adapter))
588                         /* for BE3 */
589                         populate_be_v1_stats(adapter);
590                 else
591                         populate_be_v2_stats(adapter);
592
593                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
594                 for_all_rx_queues(adapter, rxo, i) {
595                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
596                         populate_erx_stats(adapter, rxo, erx_stat);
597                 }
598         }
599 }
600
601 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
602                                                 struct rtnl_link_stats64 *stats)
603 {
604         struct be_adapter *adapter = netdev_priv(netdev);
605         struct be_drv_stats *drvs = &adapter->drv_stats;
606         struct be_rx_obj *rxo;
607         struct be_tx_obj *txo;
608         u64 pkts, bytes;
609         unsigned int start;
610         int i;
611
612         for_all_rx_queues(adapter, rxo, i) {
613                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
614
615                 do {
616                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
617                         pkts = rx_stats(rxo)->rx_pkts;
618                         bytes = rx_stats(rxo)->rx_bytes;
619                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
620                 stats->rx_packets += pkts;
621                 stats->rx_bytes += bytes;
622                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
623                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
624                                         rx_stats(rxo)->rx_drops_no_frags;
625         }
626
627         for_all_tx_queues(adapter, txo, i) {
628                 const struct be_tx_stats *tx_stats = tx_stats(txo);
629
630                 do {
631                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
632                         pkts = tx_stats(txo)->tx_pkts;
633                         bytes = tx_stats(txo)->tx_bytes;
634                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
635                 stats->tx_packets += pkts;
636                 stats->tx_bytes += bytes;
637         }
638
639         /* bad pkts received */
640         stats->rx_errors = drvs->rx_crc_errors +
641                 drvs->rx_alignment_symbol_errors +
642                 drvs->rx_in_range_errors +
643                 drvs->rx_out_range_errors +
644                 drvs->rx_frame_too_long +
645                 drvs->rx_dropped_too_small +
646                 drvs->rx_dropped_too_short +
647                 drvs->rx_dropped_header_too_small +
648                 drvs->rx_dropped_tcp_length +
649                 drvs->rx_dropped_runt;
650
651         /* detailed rx errors */
652         stats->rx_length_errors = drvs->rx_in_range_errors +
653                 drvs->rx_out_range_errors +
654                 drvs->rx_frame_too_long;
655
656         stats->rx_crc_errors = drvs->rx_crc_errors;
657
658         /* frame alignment errors */
659         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
660
661         /* receiver fifo overrun */
662         /* drops_no_pbuf is no per i/f, it's per BE card */
663         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
664                                 drvs->rx_input_fifo_overflow_drop +
665                                 drvs->rx_drops_no_pbuf;
666         return stats;
667 }
668
669 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
670 {
671         struct net_device *netdev = adapter->netdev;
672
673         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
674                 netif_carrier_off(netdev);
675                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
676         }
677
678         if (link_status)
679                 netif_carrier_on(netdev);
680         else
681                 netif_carrier_off(netdev);
682
683         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
684 }
685
686 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
687 {
688         struct be_tx_stats *stats = tx_stats(txo);
689         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
690
691         u64_stats_update_begin(&stats->sync);
692         stats->tx_reqs++;
693         stats->tx_bytes += skb->len;
694         stats->tx_pkts += tx_pkts;
695         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
696                 stats->tx_vxlan_offload_pkts += tx_pkts;
697         u64_stats_update_end(&stats->sync);
698 }
699
700 /* Returns number of WRBs needed for the skb */
701 static u32 skb_wrb_cnt(struct sk_buff *skb)
702 {
703         /* +1 for the header wrb */
704         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
705 }
706
707 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
708 {
709         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
710         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
711         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
712         wrb->rsvd0 = 0;
713 }
714
715 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
716  * to avoid the swap and shift/mask operations in wrb_fill().
717  */
718 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
719 {
720         wrb->frag_pa_hi = 0;
721         wrb->frag_pa_lo = 0;
722         wrb->frag_len = 0;
723         wrb->rsvd0 = 0;
724 }
725
726 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
727                                      struct sk_buff *skb)
728 {
729         u8 vlan_prio;
730         u16 vlan_tag;
731
732         vlan_tag = skb_vlan_tag_get(skb);
733         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
734         /* If vlan priority provided by OS is NOT in available bmap */
735         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
736                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
737                                 adapter->recommended_prio_bits;
738
739         return vlan_tag;
740 }
741
742 /* Used only for IP tunnel packets */
743 static u16 skb_inner_ip_proto(struct sk_buff *skb)
744 {
745         return (inner_ip_hdr(skb)->version == 4) ?
746                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
747 }
748
749 static u16 skb_ip_proto(struct sk_buff *skb)
750 {
751         return (ip_hdr(skb)->version == 4) ?
752                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
753 }
754
755 static inline bool be_is_txq_full(struct be_tx_obj *txo)
756 {
757         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
758 }
759
760 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
761 {
762         return atomic_read(&txo->q.used) < txo->q.len / 2;
763 }
764
765 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
766 {
767         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
768 }
769
770 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
771                                        struct sk_buff *skb,
772                                        struct be_wrb_params *wrb_params)
773 {
774         u16 proto;
775
776         if (skb_is_gso(skb)) {
777                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
778                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
779                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
780                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
781         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
782                 if (skb->encapsulation) {
783                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
784                         proto = skb_inner_ip_proto(skb);
785                 } else {
786                         proto = skb_ip_proto(skb);
787                 }
788                 if (proto == IPPROTO_TCP)
789                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
790                 else if (proto == IPPROTO_UDP)
791                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
792         }
793
794         if (skb_vlan_tag_present(skb)) {
795                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
796                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
797         }
798
799         BE_WRB_F_SET(wrb_params->features, CRC, 1);
800 }
801
802 static void wrb_fill_hdr(struct be_adapter *adapter,
803                          struct be_eth_hdr_wrb *hdr,
804                          struct be_wrb_params *wrb_params,
805                          struct sk_buff *skb)
806 {
807         memset(hdr, 0, sizeof(*hdr));
808
809         SET_TX_WRB_HDR_BITS(crc, hdr,
810                             BE_WRB_F_GET(wrb_params->features, CRC));
811         SET_TX_WRB_HDR_BITS(ipcs, hdr,
812                             BE_WRB_F_GET(wrb_params->features, IPCS));
813         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
814                             BE_WRB_F_GET(wrb_params->features, TCPCS));
815         SET_TX_WRB_HDR_BITS(udpcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, UDPCS));
817
818         SET_TX_WRB_HDR_BITS(lso, hdr,
819                             BE_WRB_F_GET(wrb_params->features, LSO));
820         SET_TX_WRB_HDR_BITS(lso6, hdr,
821                             BE_WRB_F_GET(wrb_params->features, LSO6));
822         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
823
824         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
825          * hack is not needed, the evt bit is set while ringing DB.
826          */
827         SET_TX_WRB_HDR_BITS(event, hdr,
828                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
829         SET_TX_WRB_HDR_BITS(vlan, hdr,
830                             BE_WRB_F_GET(wrb_params->features, VLAN));
831         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
832
833         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
834         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
835         SET_TX_WRB_HDR_BITS(mgmt, hdr,
836                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
837 }
838
839 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
840                           bool unmap_single)
841 {
842         dma_addr_t dma;
843         u32 frag_len = le32_to_cpu(wrb->frag_len);
844
845
846         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
847                 (u64)le32_to_cpu(wrb->frag_pa_lo);
848         if (frag_len) {
849                 if (unmap_single)
850                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
851                 else
852                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
853         }
854 }
855
856 /* Grab a WRB header for xmit */
857 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
858 {
859         u32 head = txo->q.head;
860
861         queue_head_inc(&txo->q);
862         return head;
863 }
864
865 /* Set up the WRB header for xmit */
866 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
867                                 struct be_tx_obj *txo,
868                                 struct be_wrb_params *wrb_params,
869                                 struct sk_buff *skb, u16 head)
870 {
871         u32 num_frags = skb_wrb_cnt(skb);
872         struct be_queue_info *txq = &txo->q;
873         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
874
875         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
876         be_dws_cpu_to_le(hdr, sizeof(*hdr));
877
878         BUG_ON(txo->sent_skb_list[head]);
879         txo->sent_skb_list[head] = skb;
880         txo->last_req_hdr = head;
881         atomic_add(num_frags, &txq->used);
882         txo->last_req_wrb_cnt = num_frags;
883         txo->pend_wrb_cnt += num_frags;
884 }
885
886 /* Setup a WRB fragment (buffer descriptor) for xmit */
887 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
888                                  int len)
889 {
890         struct be_eth_wrb *wrb;
891         struct be_queue_info *txq = &txo->q;
892
893         wrb = queue_head_node(txq);
894         wrb_fill(wrb, busaddr, len);
895         queue_head_inc(txq);
896 }
897
898 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
899  * was invoked. The producer index is restored to the previous packet and the
900  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
901  */
902 static void be_xmit_restore(struct be_adapter *adapter,
903                             struct be_tx_obj *txo, u32 head, bool map_single,
904                             u32 copied)
905 {
906         struct device *dev;
907         struct be_eth_wrb *wrb;
908         struct be_queue_info *txq = &txo->q;
909
910         dev = &adapter->pdev->dev;
911         txq->head = head;
912
913         /* skip the first wrb (hdr); it's not mapped */
914         queue_head_inc(txq);
915         while (copied) {
916                 wrb = queue_head_node(txq);
917                 unmap_tx_frag(dev, wrb, map_single);
918                 map_single = false;
919                 copied -= le32_to_cpu(wrb->frag_len);
920                 queue_head_inc(txq);
921         }
922
923         txq->head = head;
924 }
925
926 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
927  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
928  * of WRBs used up by the packet.
929  */
930 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
931                            struct sk_buff *skb,
932                            struct be_wrb_params *wrb_params)
933 {
934         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
935         struct device *dev = &adapter->pdev->dev;
936         struct be_queue_info *txq = &txo->q;
937         bool map_single = false;
938         u32 head = txq->head;
939         dma_addr_t busaddr;
940         int len;
941
942         head = be_tx_get_wrb_hdr(txo);
943
944         if (skb->len > skb->data_len) {
945                 len = skb_headlen(skb);
946
947                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
948                 if (dma_mapping_error(dev, busaddr))
949                         goto dma_err;
950                 map_single = true;
951                 be_tx_setup_wrb_frag(txo, busaddr, len);
952                 copied += len;
953         }
954
955         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
957                 len = skb_frag_size(frag);
958
959                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
960                 if (dma_mapping_error(dev, busaddr))
961                         goto dma_err;
962                 be_tx_setup_wrb_frag(txo, busaddr, len);
963                 copied += len;
964         }
965
966         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
967
968         be_tx_stats_update(txo, skb);
969         return wrb_cnt;
970
971 dma_err:
972         adapter->drv_stats.dma_map_errors++;
973         be_xmit_restore(adapter, txo, head, map_single, copied);
974         return 0;
975 }
976
977 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
978 {
979         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
980 }
981
982 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
983                                              struct sk_buff *skb,
984                                              struct be_wrb_params
985                                              *wrb_params)
986 {
987         u16 vlan_tag = 0;
988
989         skb = skb_share_check(skb, GFP_ATOMIC);
990         if (unlikely(!skb))
991                 return skb;
992
993         if (skb_vlan_tag_present(skb))
994                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
995
996         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
997                 if (!vlan_tag)
998                         vlan_tag = adapter->pvid;
999                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1000                  * skip VLAN insertion
1001                  */
1002                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1003         }
1004
1005         if (vlan_tag) {
1006                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1007                                                 vlan_tag);
1008                 if (unlikely(!skb))
1009                         return skb;
1010                 skb->vlan_tci = 0;
1011         }
1012
1013         /* Insert the outer VLAN, if any */
1014         if (adapter->qnq_vid) {
1015                 vlan_tag = adapter->qnq_vid;
1016                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1017                                                 vlan_tag);
1018                 if (unlikely(!skb))
1019                         return skb;
1020                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1021         }
1022
1023         return skb;
1024 }
1025
1026 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1027 {
1028         struct ethhdr *eh = (struct ethhdr *)skb->data;
1029         u16 offset = ETH_HLEN;
1030
1031         if (eh->h_proto == htons(ETH_P_IPV6)) {
1032                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1033
1034                 offset += sizeof(struct ipv6hdr);
1035                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1036                     ip6h->nexthdr != NEXTHDR_UDP) {
1037                         struct ipv6_opt_hdr *ehdr =
1038                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1039
1040                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1041                         if (ehdr->hdrlen == 0xff)
1042                                 return true;
1043                 }
1044         }
1045         return false;
1046 }
1047
1048 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1051 }
1052
1053 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1054 {
1055         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1056 }
1057
1058 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1059                                                   struct sk_buff *skb,
1060                                                   struct be_wrb_params
1061                                                   *wrb_params)
1062 {
1063         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1064         unsigned int eth_hdr_len;
1065         struct iphdr *ip;
1066
1067         /* For padded packets, BE HW modifies tot_len field in IP header
1068          * incorrecly when VLAN tag is inserted by HW.
1069          * For padded packets, Lancer computes incorrect checksum.
1070          */
1071         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1072                                                 VLAN_ETH_HLEN : ETH_HLEN;
1073         if (skb->len <= 60 &&
1074             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1075             is_ipv4_pkt(skb)) {
1076                 ip = (struct iphdr *)ip_hdr(skb);
1077                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1078         }
1079
1080         /* If vlan tag is already inlined in the packet, skip HW VLAN
1081          * tagging in pvid-tagging mode
1082          */
1083         if (be_pvid_tagging_enabled(adapter) &&
1084             veh->h_vlan_proto == htons(ETH_P_8021Q))
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086
1087         /* HW has a bug wherein it will calculate CSUM for VLAN
1088          * pkts even though it is disabled.
1089          * Manually insert VLAN in pkt.
1090          */
1091         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1092             skb_vlan_tag_present(skb)) {
1093                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1094                 if (unlikely(!skb))
1095                         goto err;
1096         }
1097
1098         /* HW may lockup when VLAN HW tagging is requested on
1099          * certain ipv6 packets. Drop such pkts if the HW workaround to
1100          * skip HW tagging is not enabled by FW.
1101          */
1102         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1103                      (adapter->pvid || adapter->qnq_vid) &&
1104                      !qnq_async_evt_rcvd(adapter)))
1105                 goto tx_drop;
1106
1107         /* Manual VLAN tag insertion to prevent:
1108          * ASIC lockup when the ASIC inserts VLAN tag into
1109          * certain ipv6 packets. Insert VLAN tags in driver,
1110          * and set event, completion, vlan bits accordingly
1111          * in the Tx WRB.
1112          */
1113         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1114             be_vlan_tag_tx_chk(adapter, skb)) {
1115                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1116                 if (unlikely(!skb))
1117                         goto err;
1118         }
1119
1120         return skb;
1121 tx_drop:
1122         dev_kfree_skb_any(skb);
1123 err:
1124         return NULL;
1125 }
1126
1127 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1128                                            struct sk_buff *skb,
1129                                            struct be_wrb_params *wrb_params)
1130 {
1131         int err;
1132
1133         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1134          * packets that are 32b or less may cause a transmit stall
1135          * on that port. The workaround is to pad such packets
1136          * (len <= 32 bytes) to a minimum length of 36b.
1137          */
1138         if (skb->len <= 32) {
1139                 if (skb_put_padto(skb, 36))
1140                         return NULL;
1141         }
1142
1143         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1144                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1145                 if (!skb)
1146                         return NULL;
1147         }
1148
1149         /* The stack can send us skbs with length greater than
1150          * what the HW can handle. Trim the extra bytes.
1151          */
1152         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1153         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1154         WARN_ON(err);
1155
1156         return skb;
1157 }
1158
1159 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1160 {
1161         struct be_queue_info *txq = &txo->q;
1162         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1163
1164         /* Mark the last request eventable if it hasn't been marked already */
1165         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1166                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1167
1168         /* compose a dummy wrb if there are odd set of wrbs to notify */
1169         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1170                 wrb_fill_dummy(queue_head_node(txq));
1171                 queue_head_inc(txq);
1172                 atomic_inc(&txq->used);
1173                 txo->pend_wrb_cnt++;
1174                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1175                                            TX_HDR_WRB_NUM_SHIFT);
1176                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1177                                           TX_HDR_WRB_NUM_SHIFT);
1178         }
1179         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1180         txo->pend_wrb_cnt = 0;
1181 }
1182
1183 /* OS2BMC related */
1184
1185 #define DHCP_CLIENT_PORT        68
1186 #define DHCP_SERVER_PORT        67
1187 #define NET_BIOS_PORT1          137
1188 #define NET_BIOS_PORT2          138
1189 #define DHCPV6_RAS_PORT         547
1190
1191 #define is_mc_allowed_on_bmc(adapter, eh)       \
1192         (!is_multicast_filt_enabled(adapter) && \
1193          is_multicast_ether_addr(eh->h_dest) && \
1194          !is_broadcast_ether_addr(eh->h_dest))
1195
1196 #define is_bc_allowed_on_bmc(adapter, eh)       \
1197         (!is_broadcast_filt_enabled(adapter) && \
1198          is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_arp_allowed_on_bmc(adapter, skb)     \
1201         (is_arp(skb) && is_arp_filt_enabled(adapter))
1202
1203 #define is_broadcast_packet(eh, adapter)        \
1204                 (is_multicast_ether_addr(eh->h_dest) && \
1205                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1206
1207 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1208
1209 #define is_arp_filt_enabled(adapter)    \
1210                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1211
1212 #define is_dhcp_client_filt_enabled(adapter)    \
1213                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1214
1215 #define is_dhcp_srvr_filt_enabled(adapter)      \
1216                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1217
1218 #define is_nbios_filt_enabled(adapter)  \
1219                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1220
1221 #define is_ipv6_na_filt_enabled(adapter)        \
1222                 (adapter->bmc_filt_mask &       \
1223                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1224
1225 #define is_ipv6_ra_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1227
1228 #define is_ipv6_ras_filt_enabled(adapter)       \
1229                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1230
1231 #define is_broadcast_filt_enabled(adapter)      \
1232                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1233
1234 #define is_multicast_filt_enabled(adapter)      \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1236
1237 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1238                                struct sk_buff **skb)
1239 {
1240         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1241         bool os2bmc = false;
1242
1243         if (!be_is_os2bmc_enabled(adapter))
1244                 goto done;
1245
1246         if (!is_multicast_ether_addr(eh->h_dest))
1247                 goto done;
1248
1249         if (is_mc_allowed_on_bmc(adapter, eh) ||
1250             is_bc_allowed_on_bmc(adapter, eh) ||
1251             is_arp_allowed_on_bmc(adapter, (*skb))) {
1252                 os2bmc = true;
1253                 goto done;
1254         }
1255
1256         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1257                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1258                 u8 nexthdr = hdr->nexthdr;
1259
1260                 if (nexthdr == IPPROTO_ICMPV6) {
1261                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1262
1263                         switch (icmp6->icmp6_type) {
1264                         case NDISC_ROUTER_ADVERTISEMENT:
1265                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1266                                 goto done;
1267                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1268                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1269                                 goto done;
1270                         default:
1271                                 break;
1272                         }
1273                 }
1274         }
1275
1276         if (is_udp_pkt((*skb))) {
1277                 struct udphdr *udp = udp_hdr((*skb));
1278
1279                 switch (ntohs(udp->dest)) {
1280                 case DHCP_CLIENT_PORT:
1281                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1282                         goto done;
1283                 case DHCP_SERVER_PORT:
1284                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1285                         goto done;
1286                 case NET_BIOS_PORT1:
1287                 case NET_BIOS_PORT2:
1288                         os2bmc = is_nbios_filt_enabled(adapter);
1289                         goto done;
1290                 case DHCPV6_RAS_PORT:
1291                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1292                         goto done;
1293                 default:
1294                         break;
1295                 }
1296         }
1297 done:
1298         /* For packets over a vlan, which are destined
1299          * to BMC, asic expects the vlan to be inline in the packet.
1300          */
1301         if (os2bmc)
1302                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1303
1304         return os2bmc;
1305 }
1306
1307 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1308 {
1309         struct be_adapter *adapter = netdev_priv(netdev);
1310         u16 q_idx = skb_get_queue_mapping(skb);
1311         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1312         struct be_wrb_params wrb_params = { 0 };
1313         bool flush = !skb->xmit_more;
1314         u16 wrb_cnt;
1315
1316         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1317         if (unlikely(!skb))
1318                 goto drop;
1319
1320         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1321
1322         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1323         if (unlikely(!wrb_cnt)) {
1324                 dev_kfree_skb_any(skb);
1325                 goto drop;
1326         }
1327
1328         /* if os2bmc is enabled and if the pkt is destined to bmc,
1329          * enqueue the pkt a 2nd time with mgmt bit set.
1330          */
1331         if (be_send_pkt_to_bmc(adapter, &skb)) {
1332                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1333                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1334                 if (unlikely(!wrb_cnt))
1335                         goto drop;
1336                 else
1337                         skb_get(skb);
1338         }
1339
1340         if (be_is_txq_full(txo)) {
1341                 netif_stop_subqueue(netdev, q_idx);
1342                 tx_stats(txo)->tx_stops++;
1343         }
1344
1345         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1346                 be_xmit_flush(adapter, txo);
1347
1348         return NETDEV_TX_OK;
1349 drop:
1350         tx_stats(txo)->tx_drv_drops++;
1351         /* Flush the already enqueued tx requests */
1352         if (flush && txo->pend_wrb_cnt)
1353                 be_xmit_flush(adapter, txo);
1354
1355         return NETDEV_TX_OK;
1356 }
1357
1358 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         struct device *dev = &adapter->pdev->dev;
1362
1363         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1364                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1365                          BE_MIN_MTU, BE_MAX_MTU);
1366                 return -EINVAL;
1367         }
1368
1369         dev_info(dev, "MTU changed from %d to %d bytes\n",
1370                  netdev->mtu, new_mtu);
1371         netdev->mtu = new_mtu;
1372         return 0;
1373 }
1374
1375 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1376 {
1377         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1378                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1379 }
1380
1381 static int be_set_vlan_promisc(struct be_adapter *adapter)
1382 {
1383         struct device *dev = &adapter->pdev->dev;
1384         int status;
1385
1386         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1387                 return 0;
1388
1389         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1390         if (!status) {
1391                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1392                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1393         } else {
1394                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1395         }
1396         return status;
1397 }
1398
1399 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1400 {
1401         struct device *dev = &adapter->pdev->dev;
1402         int status;
1403
1404         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1405         if (!status) {
1406                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1407                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1408         }
1409         return status;
1410 }
1411
1412 /*
1413  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1414  * If the user configures more, place BE in vlan promiscuous mode.
1415  */
1416 static int be_vid_config(struct be_adapter *adapter)
1417 {
1418         struct device *dev = &adapter->pdev->dev;
1419         u16 vids[BE_NUM_VLANS_SUPPORTED];
1420         u16 num = 0, i = 0;
1421         int status = 0;
1422
1423         /* No need to further configure vids if in promiscuous mode */
1424         if (be_in_all_promisc(adapter))
1425                 return 0;
1426
1427         if (adapter->vlans_added > be_max_vlans(adapter))
1428                 return be_set_vlan_promisc(adapter);
1429
1430         /* Construct VLAN Table to give to HW */
1431         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1432                 vids[num++] = cpu_to_le16(i);
1433
1434         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1435         if (status) {
1436                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1437                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1438                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1439                     addl_status(status) ==
1440                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1441                         return be_set_vlan_promisc(adapter);
1442         } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1443                 status = be_clear_vlan_promisc(adapter);
1444         }
1445         return status;
1446 }
1447
1448 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1449 {
1450         struct be_adapter *adapter = netdev_priv(netdev);
1451         int status = 0;
1452
1453         /* Packets with VID 0 are always received by Lancer by default */
1454         if (lancer_chip(adapter) && vid == 0)
1455                 return status;
1456
1457         if (test_bit(vid, adapter->vids))
1458                 return status;
1459
1460         set_bit(vid, adapter->vids);
1461         adapter->vlans_added++;
1462
1463         status = be_vid_config(adapter);
1464         if (status) {
1465                 adapter->vlans_added--;
1466                 clear_bit(vid, adapter->vids);
1467         }
1468
1469         return status;
1470 }
1471
1472 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1473 {
1474         struct be_adapter *adapter = netdev_priv(netdev);
1475
1476         /* Packets with VID 0 are always received by Lancer by default */
1477         if (lancer_chip(adapter) && vid == 0)
1478                 return 0;
1479
1480         if (!test_bit(vid, adapter->vids))
1481                 return 0;
1482
1483         clear_bit(vid, adapter->vids);
1484         adapter->vlans_added--;
1485
1486         return be_vid_config(adapter);
1487 }
1488
1489 static void be_clear_all_promisc(struct be_adapter *adapter)
1490 {
1491         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
1492         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
1493 }
1494
1495 static void be_set_all_promisc(struct be_adapter *adapter)
1496 {
1497         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1498         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1499 }
1500
1501 static void be_set_mc_promisc(struct be_adapter *adapter)
1502 {
1503         int status;
1504
1505         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1506                 return;
1507
1508         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1509         if (!status)
1510                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1511 }
1512
1513 static void be_set_mc_list(struct be_adapter *adapter)
1514 {
1515         int status;
1516
1517         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1518         if (!status)
1519                 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1520         else
1521                 be_set_mc_promisc(adapter);
1522 }
1523
1524 static void be_set_uc_list(struct be_adapter *adapter)
1525 {
1526         struct netdev_hw_addr *ha;
1527         int i = 1; /* First slot is claimed by the Primary MAC */
1528
1529         for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1530                 be_cmd_pmac_del(adapter, adapter->if_handle,
1531                                 adapter->pmac_id[i], 0);
1532
1533         if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
1534                 be_set_all_promisc(adapter);
1535                 return;
1536         }
1537
1538         netdev_for_each_uc_addr(ha, adapter->netdev) {
1539                 adapter->uc_macs++; /* First slot is for Primary MAC */
1540                 be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
1541                                 &adapter->pmac_id[adapter->uc_macs], 0);
1542         }
1543 }
1544
1545 static void be_clear_uc_list(struct be_adapter *adapter)
1546 {
1547         int i;
1548
1549         for (i = 1; i < (adapter->uc_macs + 1); i++)
1550                 be_cmd_pmac_del(adapter, adapter->if_handle,
1551                                 adapter->pmac_id[i], 0);
1552         adapter->uc_macs = 0;
1553 }
1554
1555 static void be_set_rx_mode(struct net_device *netdev)
1556 {
1557         struct be_adapter *adapter = netdev_priv(netdev);
1558
1559         if (netdev->flags & IFF_PROMISC) {
1560                 be_set_all_promisc(adapter);
1561                 return;
1562         }
1563
1564         /* Interface was previously in promiscuous mode; disable it */
1565         if (be_in_all_promisc(adapter)) {
1566                 be_clear_all_promisc(adapter);
1567                 if (adapter->vlans_added)
1568                         be_vid_config(adapter);
1569         }
1570
1571         /* Enable multicast promisc if num configured exceeds what we support */
1572         if (netdev->flags & IFF_ALLMULTI ||
1573             netdev_mc_count(netdev) > be_max_mc(adapter)) {
1574                 be_set_mc_promisc(adapter);
1575                 return;
1576         }
1577
1578         if (netdev_uc_count(netdev) != adapter->uc_macs)
1579                 be_set_uc_list(adapter);
1580
1581         be_set_mc_list(adapter);
1582 }
1583
1584 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1585 {
1586         struct be_adapter *adapter = netdev_priv(netdev);
1587         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1588         int status;
1589
1590         if (!sriov_enabled(adapter))
1591                 return -EPERM;
1592
1593         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1594                 return -EINVAL;
1595
1596         /* Proceed further only if user provided MAC is different
1597          * from active MAC
1598          */
1599         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1600                 return 0;
1601
1602         if (BEx_chip(adapter)) {
1603                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1604                                 vf + 1);
1605
1606                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1607                                          &vf_cfg->pmac_id, vf + 1);
1608         } else {
1609                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1610                                         vf + 1);
1611         }
1612
1613         if (status) {
1614                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1615                         mac, vf, status);
1616                 return be_cmd_status(status);
1617         }
1618
1619         ether_addr_copy(vf_cfg->mac_addr, mac);
1620
1621         return 0;
1622 }
1623
1624 static int be_get_vf_config(struct net_device *netdev, int vf,
1625                             struct ifla_vf_info *vi)
1626 {
1627         struct be_adapter *adapter = netdev_priv(netdev);
1628         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1629
1630         if (!sriov_enabled(adapter))
1631                 return -EPERM;
1632
1633         if (vf >= adapter->num_vfs)
1634                 return -EINVAL;
1635
1636         vi->vf = vf;
1637         vi->max_tx_rate = vf_cfg->tx_rate;
1638         vi->min_tx_rate = 0;
1639         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1640         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1641         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1642         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1643         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1644
1645         return 0;
1646 }
1647
1648 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1649 {
1650         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1651         u16 vids[BE_NUM_VLANS_SUPPORTED];
1652         int vf_if_id = vf_cfg->if_handle;
1653         int status;
1654
1655         /* Enable Transparent VLAN Tagging */
1656         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1657         if (status)
1658                 return status;
1659
1660         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1661         vids[0] = 0;
1662         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1663         if (!status)
1664                 dev_info(&adapter->pdev->dev,
1665                          "Cleared guest VLANs on VF%d", vf);
1666
1667         /* After TVT is enabled, disallow VFs to program VLAN filters */
1668         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1669                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1670                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1671                 if (!status)
1672                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1673         }
1674         return 0;
1675 }
1676
1677 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1678 {
1679         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1680         struct device *dev = &adapter->pdev->dev;
1681         int status;
1682
1683         /* Reset Transparent VLAN Tagging. */
1684         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1685                                        vf_cfg->if_handle, 0, 0);
1686         if (status)
1687                 return status;
1688
1689         /* Allow VFs to program VLAN filtering */
1690         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1691                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1692                                                   BE_PRIV_FILTMGMT, vf + 1);
1693                 if (!status) {
1694                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1695                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1696                 }
1697         }
1698
1699         dev_info(dev,
1700                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1701         return 0;
1702 }
1703
1704 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1705 {
1706         struct be_adapter *adapter = netdev_priv(netdev);
1707         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1708         int status;
1709
1710         if (!sriov_enabled(adapter))
1711                 return -EPERM;
1712
1713         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1714                 return -EINVAL;
1715
1716         if (vlan || qos) {
1717                 vlan |= qos << VLAN_PRIO_SHIFT;
1718                 status = be_set_vf_tvt(adapter, vf, vlan);
1719         } else {
1720                 status = be_clear_vf_tvt(adapter, vf);
1721         }
1722
1723         if (status) {
1724                 dev_err(&adapter->pdev->dev,
1725                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1726                         status);
1727                 return be_cmd_status(status);
1728         }
1729
1730         vf_cfg->vlan_tag = vlan;
1731         return 0;
1732 }
1733
1734 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1735                              int min_tx_rate, int max_tx_rate)
1736 {
1737         struct be_adapter *adapter = netdev_priv(netdev);
1738         struct device *dev = &adapter->pdev->dev;
1739         int percent_rate, status = 0;
1740         u16 link_speed = 0;
1741         u8 link_status;
1742
1743         if (!sriov_enabled(adapter))
1744                 return -EPERM;
1745
1746         if (vf >= adapter->num_vfs)
1747                 return -EINVAL;
1748
1749         if (min_tx_rate)
1750                 return -EINVAL;
1751
1752         if (!max_tx_rate)
1753                 goto config_qos;
1754
1755         status = be_cmd_link_status_query(adapter, &link_speed,
1756                                           &link_status, 0);
1757         if (status)
1758                 goto err;
1759
1760         if (!link_status) {
1761                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1762                 status = -ENETDOWN;
1763                 goto err;
1764         }
1765
1766         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1767                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1768                         link_speed);
1769                 status = -EINVAL;
1770                 goto err;
1771         }
1772
1773         /* On Skyhawk the QOS setting must be done only as a % value */
1774         percent_rate = link_speed / 100;
1775         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1776                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1777                         percent_rate);
1778                 status = -EINVAL;
1779                 goto err;
1780         }
1781
1782 config_qos:
1783         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1784         if (status)
1785                 goto err;
1786
1787         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1788         return 0;
1789
1790 err:
1791         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1792                 max_tx_rate, vf);
1793         return be_cmd_status(status);
1794 }
1795
1796 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1797                                 int link_state)
1798 {
1799         struct be_adapter *adapter = netdev_priv(netdev);
1800         int status;
1801
1802         if (!sriov_enabled(adapter))
1803                 return -EPERM;
1804
1805         if (vf >= adapter->num_vfs)
1806                 return -EINVAL;
1807
1808         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1809         if (status) {
1810                 dev_err(&adapter->pdev->dev,
1811                         "Link state change on VF %d failed: %#x\n", vf, status);
1812                 return be_cmd_status(status);
1813         }
1814
1815         adapter->vf_cfg[vf].plink_tracking = link_state;
1816
1817         return 0;
1818 }
1819
1820 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1821 {
1822         struct be_adapter *adapter = netdev_priv(netdev);
1823         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1824         u8 spoofchk;
1825         int status;
1826
1827         if (!sriov_enabled(adapter))
1828                 return -EPERM;
1829
1830         if (vf >= adapter->num_vfs)
1831                 return -EINVAL;
1832
1833         if (BEx_chip(adapter))
1834                 return -EOPNOTSUPP;
1835
1836         if (enable == vf_cfg->spoofchk)
1837                 return 0;
1838
1839         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1840
1841         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1842                                        0, spoofchk);
1843         if (status) {
1844                 dev_err(&adapter->pdev->dev,
1845                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1846                 return be_cmd_status(status);
1847         }
1848
1849         vf_cfg->spoofchk = enable;
1850         return 0;
1851 }
1852
1853 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1854                           ulong now)
1855 {
1856         aic->rx_pkts_prev = rx_pkts;
1857         aic->tx_reqs_prev = tx_pkts;
1858         aic->jiffies = now;
1859 }
1860
1861 static int be_get_new_eqd(struct be_eq_obj *eqo)
1862 {
1863         struct be_adapter *adapter = eqo->adapter;
1864         int eqd, start;
1865         struct be_aic_obj *aic;
1866         struct be_rx_obj *rxo;
1867         struct be_tx_obj *txo;
1868         u64 rx_pkts = 0, tx_pkts = 0;
1869         ulong now;
1870         u32 pps, delta;
1871         int i;
1872
1873         aic = &adapter->aic_obj[eqo->idx];
1874         if (!aic->enable) {
1875                 if (aic->jiffies)
1876                         aic->jiffies = 0;
1877                 eqd = aic->et_eqd;
1878                 return eqd;
1879         }
1880
1881         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1882                 do {
1883                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1884                         rx_pkts += rxo->stats.rx_pkts;
1885                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1886         }
1887
1888         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1889                 do {
1890                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1891                         tx_pkts += txo->stats.tx_reqs;
1892                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1893         }
1894
1895         /* Skip, if wrapped around or first calculation */
1896         now = jiffies;
1897         if (!aic->jiffies || time_before(now, aic->jiffies) ||
1898             rx_pkts < aic->rx_pkts_prev ||
1899             tx_pkts < aic->tx_reqs_prev) {
1900                 be_aic_update(aic, rx_pkts, tx_pkts, now);
1901                 return aic->prev_eqd;
1902         }
1903
1904         delta = jiffies_to_msecs(now - aic->jiffies);
1905         if (delta == 0)
1906                 return aic->prev_eqd;
1907
1908         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1909                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1910         eqd = (pps / 15000) << 2;
1911
1912         if (eqd < 8)
1913                 eqd = 0;
1914         eqd = min_t(u32, eqd, aic->max_eqd);
1915         eqd = max_t(u32, eqd, aic->min_eqd);
1916
1917         be_aic_update(aic, rx_pkts, tx_pkts, now);
1918
1919         return eqd;
1920 }
1921
1922 /* For Skyhawk-R only */
1923 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
1924 {
1925         struct be_adapter *adapter = eqo->adapter;
1926         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
1927         ulong now = jiffies;
1928         int eqd;
1929         u32 mult_enc;
1930
1931         if (!aic->enable)
1932                 return 0;
1933
1934         if (jiffies_to_msecs(now - aic->jiffies) < 1)
1935                 eqd = aic->prev_eqd;
1936         else
1937                 eqd = be_get_new_eqd(eqo);
1938
1939         if (eqd > 100)
1940                 mult_enc = R2I_DLY_ENC_1;
1941         else if (eqd > 60)
1942                 mult_enc = R2I_DLY_ENC_2;
1943         else if (eqd > 20)
1944                 mult_enc = R2I_DLY_ENC_3;
1945         else
1946                 mult_enc = R2I_DLY_ENC_0;
1947
1948         aic->prev_eqd = eqd;
1949
1950         return mult_enc;
1951 }
1952
1953 void be_eqd_update(struct be_adapter *adapter, bool force_update)
1954 {
1955         struct be_set_eqd set_eqd[MAX_EVT_QS];
1956         struct be_aic_obj *aic;
1957         struct be_eq_obj *eqo;
1958         int i, num = 0, eqd;
1959
1960         for_all_evt_queues(adapter, eqo, i) {
1961                 aic = &adapter->aic_obj[eqo->idx];
1962                 eqd = be_get_new_eqd(eqo);
1963                 if (force_update || eqd != aic->prev_eqd) {
1964                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
1965                         set_eqd[num].eq_id = eqo->q.id;
1966                         aic->prev_eqd = eqd;
1967                         num++;
1968                 }
1969         }
1970
1971         if (num)
1972                 be_cmd_modify_eqd(adapter, set_eqd, num);
1973 }
1974
1975 static void be_rx_stats_update(struct be_rx_obj *rxo,
1976                                struct be_rx_compl_info *rxcp)
1977 {
1978         struct be_rx_stats *stats = rx_stats(rxo);
1979
1980         u64_stats_update_begin(&stats->sync);
1981         stats->rx_compl++;
1982         stats->rx_bytes += rxcp->pkt_size;
1983         stats->rx_pkts++;
1984         if (rxcp->tunneled)
1985                 stats->rx_vxlan_offload_pkts++;
1986         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
1987                 stats->rx_mcast_pkts++;
1988         if (rxcp->err)
1989                 stats->rx_compl_err++;
1990         u64_stats_update_end(&stats->sync);
1991 }
1992
1993 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
1994 {
1995         /* L4 checksum is not reliable for non TCP/UDP packets.
1996          * Also ignore ipcksm for ipv6 pkts
1997          */
1998         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
1999                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2000 }
2001
2002 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2003 {
2004         struct be_adapter *adapter = rxo->adapter;
2005         struct be_rx_page_info *rx_page_info;
2006         struct be_queue_info *rxq = &rxo->q;
2007         u32 frag_idx = rxq->tail;
2008
2009         rx_page_info = &rxo->page_info_tbl[frag_idx];
2010         BUG_ON(!rx_page_info->page);
2011
2012         if (rx_page_info->last_frag) {
2013                 dma_unmap_page(&adapter->pdev->dev,
2014                                dma_unmap_addr(rx_page_info, bus),
2015                                adapter->big_page_size, DMA_FROM_DEVICE);
2016                 rx_page_info->last_frag = false;
2017         } else {
2018                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2019                                         dma_unmap_addr(rx_page_info, bus),
2020                                         rx_frag_size, DMA_FROM_DEVICE);
2021         }
2022
2023         queue_tail_inc(rxq);
2024         atomic_dec(&rxq->used);
2025         return rx_page_info;
2026 }
2027
2028 /* Throwaway the data in the Rx completion */
2029 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2030                                 struct be_rx_compl_info *rxcp)
2031 {
2032         struct be_rx_page_info *page_info;
2033         u16 i, num_rcvd = rxcp->num_rcvd;
2034
2035         for (i = 0; i < num_rcvd; i++) {
2036                 page_info = get_rx_page_info(rxo);
2037                 put_page(page_info->page);
2038                 memset(page_info, 0, sizeof(*page_info));
2039         }
2040 }
2041
2042 /*
2043  * skb_fill_rx_data forms a complete skb for an ether frame
2044  * indicated by rxcp.
2045  */
2046 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2047                              struct be_rx_compl_info *rxcp)
2048 {
2049         struct be_rx_page_info *page_info;
2050         u16 i, j;
2051         u16 hdr_len, curr_frag_len, remaining;
2052         u8 *start;
2053
2054         page_info = get_rx_page_info(rxo);
2055         start = page_address(page_info->page) + page_info->page_offset;
2056         prefetch(start);
2057
2058         /* Copy data in the first descriptor of this completion */
2059         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2060
2061         skb->len = curr_frag_len;
2062         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2063                 memcpy(skb->data, start, curr_frag_len);
2064                 /* Complete packet has now been moved to data */
2065                 put_page(page_info->page);
2066                 skb->data_len = 0;
2067                 skb->tail += curr_frag_len;
2068         } else {
2069                 hdr_len = ETH_HLEN;
2070                 memcpy(skb->data, start, hdr_len);
2071                 skb_shinfo(skb)->nr_frags = 1;
2072                 skb_frag_set_page(skb, 0, page_info->page);
2073                 skb_shinfo(skb)->frags[0].page_offset =
2074                                         page_info->page_offset + hdr_len;
2075                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2076                                   curr_frag_len - hdr_len);
2077                 skb->data_len = curr_frag_len - hdr_len;
2078                 skb->truesize += rx_frag_size;
2079                 skb->tail += hdr_len;
2080         }
2081         page_info->page = NULL;
2082
2083         if (rxcp->pkt_size <= rx_frag_size) {
2084                 BUG_ON(rxcp->num_rcvd != 1);
2085                 return;
2086         }
2087
2088         /* More frags present for this completion */
2089         remaining = rxcp->pkt_size - curr_frag_len;
2090         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2091                 page_info = get_rx_page_info(rxo);
2092                 curr_frag_len = min(remaining, rx_frag_size);
2093
2094                 /* Coalesce all frags from the same physical page in one slot */
2095                 if (page_info->page_offset == 0) {
2096                         /* Fresh page */
2097                         j++;
2098                         skb_frag_set_page(skb, j, page_info->page);
2099                         skb_shinfo(skb)->frags[j].page_offset =
2100                                                         page_info->page_offset;
2101                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2102                         skb_shinfo(skb)->nr_frags++;
2103                 } else {
2104                         put_page(page_info->page);
2105                 }
2106
2107                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2108                 skb->len += curr_frag_len;
2109                 skb->data_len += curr_frag_len;
2110                 skb->truesize += rx_frag_size;
2111                 remaining -= curr_frag_len;
2112                 page_info->page = NULL;
2113         }
2114         BUG_ON(j > MAX_SKB_FRAGS);
2115 }
2116
2117 /* Process the RX completion indicated by rxcp when GRO is disabled */
2118 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2119                                 struct be_rx_compl_info *rxcp)
2120 {
2121         struct be_adapter *adapter = rxo->adapter;
2122         struct net_device *netdev = adapter->netdev;
2123         struct sk_buff *skb;
2124
2125         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2126         if (unlikely(!skb)) {
2127                 rx_stats(rxo)->rx_drops_no_skbs++;
2128                 be_rx_compl_discard(rxo, rxcp);
2129                 return;
2130         }
2131
2132         skb_fill_rx_data(rxo, skb, rxcp);
2133
2134         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2135                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2136         else
2137                 skb_checksum_none_assert(skb);
2138
2139         skb->protocol = eth_type_trans(skb, netdev);
2140         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2141         if (netdev->features & NETIF_F_RXHASH)
2142                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2143
2144         skb->csum_level = rxcp->tunneled;
2145         skb_mark_napi_id(skb, napi);
2146
2147         if (rxcp->vlanf)
2148                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2149
2150         netif_receive_skb(skb);
2151 }
2152
2153 /* Process the RX completion indicated by rxcp when GRO is enabled */
2154 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2155                                     struct napi_struct *napi,
2156                                     struct be_rx_compl_info *rxcp)
2157 {
2158         struct be_adapter *adapter = rxo->adapter;
2159         struct be_rx_page_info *page_info;
2160         struct sk_buff *skb = NULL;
2161         u16 remaining, curr_frag_len;
2162         u16 i, j;
2163
2164         skb = napi_get_frags(napi);
2165         if (!skb) {
2166                 be_rx_compl_discard(rxo, rxcp);
2167                 return;
2168         }
2169
2170         remaining = rxcp->pkt_size;
2171         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2172                 page_info = get_rx_page_info(rxo);
2173
2174                 curr_frag_len = min(remaining, rx_frag_size);
2175
2176                 /* Coalesce all frags from the same physical page in one slot */
2177                 if (i == 0 || page_info->page_offset == 0) {
2178                         /* First frag or Fresh page */
2179                         j++;
2180                         skb_frag_set_page(skb, j, page_info->page);
2181                         skb_shinfo(skb)->frags[j].page_offset =
2182                                                         page_info->page_offset;
2183                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2184                 } else {
2185                         put_page(page_info->page);
2186                 }
2187                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2188                 skb->truesize += rx_frag_size;
2189                 remaining -= curr_frag_len;
2190                 memset(page_info, 0, sizeof(*page_info));
2191         }
2192         BUG_ON(j > MAX_SKB_FRAGS);
2193
2194         skb_shinfo(skb)->nr_frags = j + 1;
2195         skb->len = rxcp->pkt_size;
2196         skb->data_len = rxcp->pkt_size;
2197         skb->ip_summed = CHECKSUM_UNNECESSARY;
2198         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2199         if (adapter->netdev->features & NETIF_F_RXHASH)
2200                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2201
2202         skb->csum_level = rxcp->tunneled;
2203
2204         if (rxcp->vlanf)
2205                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2206
2207         napi_gro_frags(napi);
2208 }
2209
2210 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2211                                  struct be_rx_compl_info *rxcp)
2212 {
2213         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2214         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2215         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2216         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2217         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2218         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2219         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2220         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2221         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2222         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2223         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2224         if (rxcp->vlanf) {
2225                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2226                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2227         }
2228         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2229         rxcp->tunneled =
2230                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2231 }
2232
2233 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2234                                  struct be_rx_compl_info *rxcp)
2235 {
2236         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2237         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2238         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2239         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2240         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2241         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2242         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2243         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2244         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2245         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2246         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2247         if (rxcp->vlanf) {
2248                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2249                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2250         }
2251         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2252         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2253 }
2254
2255 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2256 {
2257         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2258         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2259         struct be_adapter *adapter = rxo->adapter;
2260
2261         /* For checking the valid bit it is Ok to use either definition as the
2262          * valid bit is at the same position in both v0 and v1 Rx compl */
2263         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2264                 return NULL;
2265
2266         rmb();
2267         be_dws_le_to_cpu(compl, sizeof(*compl));
2268
2269         if (adapter->be3_native)
2270                 be_parse_rx_compl_v1(compl, rxcp);
2271         else
2272                 be_parse_rx_compl_v0(compl, rxcp);
2273
2274         if (rxcp->ip_frag)
2275                 rxcp->l4_csum = 0;
2276
2277         if (rxcp->vlanf) {
2278                 /* In QNQ modes, if qnq bit is not set, then the packet was
2279                  * tagged only with the transparent outer vlan-tag and must
2280                  * not be treated as a vlan packet by host
2281                  */
2282                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2283                         rxcp->vlanf = 0;
2284
2285                 if (!lancer_chip(adapter))
2286                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2287
2288                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2289                     !test_bit(rxcp->vlan_tag, adapter->vids))
2290                         rxcp->vlanf = 0;
2291         }
2292
2293         /* As the compl has been parsed, reset it; we wont touch it again */
2294         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2295
2296         queue_tail_inc(&rxo->cq);
2297         return rxcp;
2298 }
2299
2300 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2301 {
2302         u32 order = get_order(size);
2303
2304         if (order > 0)
2305                 gfp |= __GFP_COMP;
2306         return  alloc_pages(gfp, order);
2307 }
2308
2309 /*
2310  * Allocate a page, split it to fragments of size rx_frag_size and post as
2311  * receive buffers to BE
2312  */
2313 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2314 {
2315         struct be_adapter *adapter = rxo->adapter;
2316         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2317         struct be_queue_info *rxq = &rxo->q;
2318         struct page *pagep = NULL;
2319         struct device *dev = &adapter->pdev->dev;
2320         struct be_eth_rx_d *rxd;
2321         u64 page_dmaaddr = 0, frag_dmaaddr;
2322         u32 posted, page_offset = 0, notify = 0;
2323
2324         page_info = &rxo->page_info_tbl[rxq->head];
2325         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2326                 if (!pagep) {
2327                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2328                         if (unlikely(!pagep)) {
2329                                 rx_stats(rxo)->rx_post_fail++;
2330                                 break;
2331                         }
2332                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2333                                                     adapter->big_page_size,
2334                                                     DMA_FROM_DEVICE);
2335                         if (dma_mapping_error(dev, page_dmaaddr)) {
2336                                 put_page(pagep);
2337                                 pagep = NULL;
2338                                 adapter->drv_stats.dma_map_errors++;
2339                                 break;
2340                         }
2341                         page_offset = 0;
2342                 } else {
2343                         get_page(pagep);
2344                         page_offset += rx_frag_size;
2345                 }
2346                 page_info->page_offset = page_offset;
2347                 page_info->page = pagep;
2348
2349                 rxd = queue_head_node(rxq);
2350                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2351                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2352                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2353
2354                 /* Any space left in the current big page for another frag? */
2355                 if ((page_offset + rx_frag_size + rx_frag_size) >
2356                                         adapter->big_page_size) {
2357                         pagep = NULL;
2358                         page_info->last_frag = true;
2359                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2360                 } else {
2361                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2362                 }
2363
2364                 prev_page_info = page_info;
2365                 queue_head_inc(rxq);
2366                 page_info = &rxo->page_info_tbl[rxq->head];
2367         }
2368
2369         /* Mark the last frag of a page when we break out of the above loop
2370          * with no more slots available in the RXQ
2371          */
2372         if (pagep) {
2373                 prev_page_info->last_frag = true;
2374                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2375         }
2376
2377         if (posted) {
2378                 atomic_add(posted, &rxq->used);
2379                 if (rxo->rx_post_starved)
2380                         rxo->rx_post_starved = false;
2381                 do {
2382                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2383                         be_rxq_notify(adapter, rxq->id, notify);
2384                         posted -= notify;
2385                 } while (posted);
2386         } else if (atomic_read(&rxq->used) == 0) {
2387                 /* Let be_worker replenish when memory is available */
2388                 rxo->rx_post_starved = true;
2389         }
2390 }
2391
2392 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2393 {
2394         struct be_queue_info *tx_cq = &txo->cq;
2395         struct be_tx_compl_info *txcp = &txo->txcp;
2396         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2397
2398         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2399                 return NULL;
2400
2401         /* Ensure load ordering of valid bit dword and other dwords below */
2402         rmb();
2403         be_dws_le_to_cpu(compl, sizeof(*compl));
2404
2405         txcp->status = GET_TX_COMPL_BITS(status, compl);
2406         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2407
2408         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2409         queue_tail_inc(tx_cq);
2410         return txcp;
2411 }
2412
2413 static u16 be_tx_compl_process(struct be_adapter *adapter,
2414                                struct be_tx_obj *txo, u16 last_index)
2415 {
2416         struct sk_buff **sent_skbs = txo->sent_skb_list;
2417         struct be_queue_info *txq = &txo->q;
2418         struct sk_buff *skb = NULL;
2419         bool unmap_skb_hdr = false;
2420         struct be_eth_wrb *wrb;
2421         u16 num_wrbs = 0;
2422         u32 frag_index;
2423
2424         do {
2425                 if (sent_skbs[txq->tail]) {
2426                         /* Free skb from prev req */
2427                         if (skb)
2428                                 dev_consume_skb_any(skb);
2429                         skb = sent_skbs[txq->tail];
2430                         sent_skbs[txq->tail] = NULL;
2431                         queue_tail_inc(txq);  /* skip hdr wrb */
2432                         num_wrbs++;
2433                         unmap_skb_hdr = true;
2434                 }
2435                 wrb = queue_tail_node(txq);
2436                 frag_index = txq->tail;
2437                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2438                               (unmap_skb_hdr && skb_headlen(skb)));
2439                 unmap_skb_hdr = false;
2440                 queue_tail_inc(txq);
2441                 num_wrbs++;
2442         } while (frag_index != last_index);
2443         dev_consume_skb_any(skb);
2444
2445         return num_wrbs;
2446 }
2447
2448 /* Return the number of events in the event queue */
2449 static inline int events_get(struct be_eq_obj *eqo)
2450 {
2451         struct be_eq_entry *eqe;
2452         int num = 0;
2453
2454         do {
2455                 eqe = queue_tail_node(&eqo->q);
2456                 if (eqe->evt == 0)
2457                         break;
2458
2459                 rmb();
2460                 eqe->evt = 0;
2461                 num++;
2462                 queue_tail_inc(&eqo->q);
2463         } while (true);
2464
2465         return num;
2466 }
2467
2468 /* Leaves the EQ is disarmed state */
2469 static void be_eq_clean(struct be_eq_obj *eqo)
2470 {
2471         int num = events_get(eqo);
2472
2473         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2474 }
2475
2476 /* Free posted rx buffers that were not used */
2477 static void be_rxq_clean(struct be_rx_obj *rxo)
2478 {
2479         struct be_queue_info *rxq = &rxo->q;
2480         struct be_rx_page_info *page_info;
2481
2482         while (atomic_read(&rxq->used) > 0) {
2483                 page_info = get_rx_page_info(rxo);
2484                 put_page(page_info->page);
2485                 memset(page_info, 0, sizeof(*page_info));
2486         }
2487         BUG_ON(atomic_read(&rxq->used));
2488         rxq->tail = 0;
2489         rxq->head = 0;
2490 }
2491
2492 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2493 {
2494         struct be_queue_info *rx_cq = &rxo->cq;
2495         struct be_rx_compl_info *rxcp;
2496         struct be_adapter *adapter = rxo->adapter;
2497         int flush_wait = 0;
2498
2499         /* Consume pending rx completions.
2500          * Wait for the flush completion (identified by zero num_rcvd)
2501          * to arrive. Notify CQ even when there are no more CQ entries
2502          * for HW to flush partially coalesced CQ entries.
2503          * In Lancer, there is no need to wait for flush compl.
2504          */
2505         for (;;) {
2506                 rxcp = be_rx_compl_get(rxo);
2507                 if (!rxcp) {
2508                         if (lancer_chip(adapter))
2509                                 break;
2510
2511                         if (flush_wait++ > 50 ||
2512                             be_check_error(adapter,
2513                                            BE_ERROR_HW)) {
2514                                 dev_warn(&adapter->pdev->dev,
2515                                          "did not receive flush compl\n");
2516                                 break;
2517                         }
2518                         be_cq_notify(adapter, rx_cq->id, true, 0);
2519                         mdelay(1);
2520                 } else {
2521                         be_rx_compl_discard(rxo, rxcp);
2522                         be_cq_notify(adapter, rx_cq->id, false, 1);
2523                         if (rxcp->num_rcvd == 0)
2524                                 break;
2525                 }
2526         }
2527
2528         /* After cleanup, leave the CQ in unarmed state */
2529         be_cq_notify(adapter, rx_cq->id, false, 0);
2530 }
2531
2532 static void be_tx_compl_clean(struct be_adapter *adapter)
2533 {
2534         struct device *dev = &adapter->pdev->dev;
2535         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2536         struct be_tx_compl_info *txcp;
2537         struct be_queue_info *txq;
2538         u32 end_idx, notified_idx;
2539         struct be_tx_obj *txo;
2540         int i, pending_txqs;
2541
2542         /* Stop polling for compls when HW has been silent for 10ms */
2543         do {
2544                 pending_txqs = adapter->num_tx_qs;
2545
2546                 for_all_tx_queues(adapter, txo, i) {
2547                         cmpl = 0;
2548                         num_wrbs = 0;
2549                         txq = &txo->q;
2550                         while ((txcp = be_tx_compl_get(txo))) {
2551                                 num_wrbs +=
2552                                         be_tx_compl_process(adapter, txo,
2553                                                             txcp->end_index);
2554                                 cmpl++;
2555                         }
2556                         if (cmpl) {
2557                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2558                                 atomic_sub(num_wrbs, &txq->used);
2559                                 timeo = 0;
2560                         }
2561                         if (!be_is_tx_compl_pending(txo))
2562                                 pending_txqs--;
2563                 }
2564
2565                 if (pending_txqs == 0 || ++timeo > 10 ||
2566                     be_check_error(adapter, BE_ERROR_HW))
2567                         break;
2568
2569                 mdelay(1);
2570         } while (true);
2571
2572         /* Free enqueued TX that was never notified to HW */
2573         for_all_tx_queues(adapter, txo, i) {
2574                 txq = &txo->q;
2575
2576                 if (atomic_read(&txq->used)) {
2577                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2578                                  i, atomic_read(&txq->used));
2579                         notified_idx = txq->tail;
2580                         end_idx = txq->tail;
2581                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2582                                   txq->len);
2583                         /* Use the tx-compl process logic to handle requests
2584                          * that were not sent to the HW.
2585                          */
2586                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2587                         atomic_sub(num_wrbs, &txq->used);
2588                         BUG_ON(atomic_read(&txq->used));
2589                         txo->pend_wrb_cnt = 0;
2590                         /* Since hw was never notified of these requests,
2591                          * reset TXQ indices
2592                          */
2593                         txq->head = notified_idx;
2594                         txq->tail = notified_idx;
2595                 }
2596         }
2597 }
2598
2599 static void be_evt_queues_destroy(struct be_adapter *adapter)
2600 {
2601         struct be_eq_obj *eqo;
2602         int i;
2603
2604         for_all_evt_queues(adapter, eqo, i) {
2605                 if (eqo->q.created) {
2606                         be_eq_clean(eqo);
2607                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2608                         napi_hash_del(&eqo->napi);
2609                         netif_napi_del(&eqo->napi);
2610                         free_cpumask_var(eqo->affinity_mask);
2611                 }
2612                 be_queue_free(adapter, &eqo->q);
2613         }
2614 }
2615
2616 static int be_evt_queues_create(struct be_adapter *adapter)
2617 {
2618         struct be_queue_info *eq;
2619         struct be_eq_obj *eqo;
2620         struct be_aic_obj *aic;
2621         int i, rc;
2622
2623         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2624                                     adapter->cfg_num_qs);
2625
2626         for_all_evt_queues(adapter, eqo, i) {
2627                 int numa_node = dev_to_node(&adapter->pdev->dev);
2628
2629                 aic = &adapter->aic_obj[i];
2630                 eqo->adapter = adapter;
2631                 eqo->idx = i;
2632                 aic->max_eqd = BE_MAX_EQD;
2633                 aic->enable = true;
2634
2635                 eq = &eqo->q;
2636                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2637                                     sizeof(struct be_eq_entry));
2638                 if (rc)
2639                         return rc;
2640
2641                 rc = be_cmd_eq_create(adapter, eqo);
2642                 if (rc)
2643                         return rc;
2644
2645                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2646                         return -ENOMEM;
2647                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2648                                 eqo->affinity_mask);
2649                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2650                                BE_NAPI_WEIGHT);
2651         }
2652         return 0;
2653 }
2654
2655 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2656 {
2657         struct be_queue_info *q;
2658
2659         q = &adapter->mcc_obj.q;
2660         if (q->created)
2661                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2662         be_queue_free(adapter, q);
2663
2664         q = &adapter->mcc_obj.cq;
2665         if (q->created)
2666                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2667         be_queue_free(adapter, q);
2668 }
2669
2670 /* Must be called only after TX qs are created as MCC shares TX EQ */
2671 static int be_mcc_queues_create(struct be_adapter *adapter)
2672 {
2673         struct be_queue_info *q, *cq;
2674
2675         cq = &adapter->mcc_obj.cq;
2676         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2677                            sizeof(struct be_mcc_compl)))
2678                 goto err;
2679
2680         /* Use the default EQ for MCC completions */
2681         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2682                 goto mcc_cq_free;
2683
2684         q = &adapter->mcc_obj.q;
2685         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2686                 goto mcc_cq_destroy;
2687
2688         if (be_cmd_mccq_create(adapter, q, cq))
2689                 goto mcc_q_free;
2690
2691         return 0;
2692
2693 mcc_q_free:
2694         be_queue_free(adapter, q);
2695 mcc_cq_destroy:
2696         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2697 mcc_cq_free:
2698         be_queue_free(adapter, cq);
2699 err:
2700         return -1;
2701 }
2702
2703 static void be_tx_queues_destroy(struct be_adapter *adapter)
2704 {
2705         struct be_queue_info *q;
2706         struct be_tx_obj *txo;
2707         u8 i;
2708
2709         for_all_tx_queues(adapter, txo, i) {
2710                 q = &txo->q;
2711                 if (q->created)
2712                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2713                 be_queue_free(adapter, q);
2714
2715                 q = &txo->cq;
2716                 if (q->created)
2717                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2718                 be_queue_free(adapter, q);
2719         }
2720 }
2721
2722 static int be_tx_qs_create(struct be_adapter *adapter)
2723 {
2724         struct be_queue_info *cq;
2725         struct be_tx_obj *txo;
2726         struct be_eq_obj *eqo;
2727         int status, i;
2728
2729         adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter));
2730
2731         for_all_tx_queues(adapter, txo, i) {
2732                 cq = &txo->cq;
2733                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2734                                         sizeof(struct be_eth_tx_compl));
2735                 if (status)
2736                         return status;
2737
2738                 u64_stats_init(&txo->stats.sync);
2739                 u64_stats_init(&txo->stats.sync_compl);
2740
2741                 /* If num_evt_qs is less than num_tx_qs, then more than
2742                  * one txq share an eq
2743                  */
2744                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2745                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2746                 if (status)
2747                         return status;
2748
2749                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2750                                         sizeof(struct be_eth_wrb));
2751                 if (status)
2752                         return status;
2753
2754                 status = be_cmd_txq_create(adapter, txo);
2755                 if (status)
2756                         return status;
2757
2758                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2759                                     eqo->idx);
2760         }
2761
2762         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2763                  adapter->num_tx_qs);
2764         return 0;
2765 }
2766
2767 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2768 {
2769         struct be_queue_info *q;
2770         struct be_rx_obj *rxo;
2771         int i;
2772
2773         for_all_rx_queues(adapter, rxo, i) {
2774                 q = &rxo->cq;
2775                 if (q->created)
2776                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2777                 be_queue_free(adapter, q);
2778         }
2779 }
2780
2781 static int be_rx_cqs_create(struct be_adapter *adapter)
2782 {
2783         struct be_queue_info *eq, *cq;
2784         struct be_rx_obj *rxo;
2785         int rc, i;
2786
2787         /* We can create as many RSS rings as there are EQs. */
2788         adapter->num_rss_qs = adapter->num_evt_qs;
2789
2790         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2791         if (adapter->num_rss_qs <= 1)
2792                 adapter->num_rss_qs = 0;
2793
2794         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2795
2796         /* When the interface is not capable of RSS rings (and there is no
2797          * need to create a default RXQ) we'll still need one RXQ
2798          */
2799         if (adapter->num_rx_qs == 0)
2800                 adapter->num_rx_qs = 1;
2801
2802         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2803         for_all_rx_queues(adapter, rxo, i) {
2804                 rxo->adapter = adapter;
2805                 cq = &rxo->cq;
2806                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2807                                     sizeof(struct be_eth_rx_compl));
2808                 if (rc)
2809                         return rc;
2810
2811                 u64_stats_init(&rxo->stats.sync);
2812                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2813                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2814                 if (rc)
2815                         return rc;
2816         }
2817
2818         dev_info(&adapter->pdev->dev,
2819                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2820         return 0;
2821 }
2822
2823 static irqreturn_t be_intx(int irq, void *dev)
2824 {
2825         struct be_eq_obj *eqo = dev;
2826         struct be_adapter *adapter = eqo->adapter;
2827         int num_evts = 0;
2828
2829         /* IRQ is not expected when NAPI is scheduled as the EQ
2830          * will not be armed.
2831          * But, this can happen on Lancer INTx where it takes
2832          * a while to de-assert INTx or in BE2 where occasionaly
2833          * an interrupt may be raised even when EQ is unarmed.
2834          * If NAPI is already scheduled, then counting & notifying
2835          * events will orphan them.
2836          */
2837         if (napi_schedule_prep(&eqo->napi)) {
2838                 num_evts = events_get(eqo);
2839                 __napi_schedule(&eqo->napi);
2840                 if (num_evts)
2841                         eqo->spurious_intr = 0;
2842         }
2843         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2844
2845         /* Return IRQ_HANDLED only for the the first spurious intr
2846          * after a valid intr to stop the kernel from branding
2847          * this irq as a bad one!
2848          */
2849         if (num_evts || eqo->spurious_intr++ == 0)
2850                 return IRQ_HANDLED;
2851         else
2852                 return IRQ_NONE;
2853 }
2854
2855 static irqreturn_t be_msix(int irq, void *dev)
2856 {
2857         struct be_eq_obj *eqo = dev;
2858
2859         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2860         napi_schedule(&eqo->napi);
2861         return IRQ_HANDLED;
2862 }
2863
2864 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2865 {
2866         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2867 }
2868
2869 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2870                          int budget, int polling)
2871 {
2872         struct be_adapter *adapter = rxo->adapter;
2873         struct be_queue_info *rx_cq = &rxo->cq;
2874         struct be_rx_compl_info *rxcp;
2875         u32 work_done;
2876         u32 frags_consumed = 0;
2877
2878         for (work_done = 0; work_done < budget; work_done++) {
2879                 rxcp = be_rx_compl_get(rxo);
2880                 if (!rxcp)
2881                         break;
2882
2883                 /* Is it a flush compl that has no data */
2884                 if (unlikely(rxcp->num_rcvd == 0))
2885                         goto loop_continue;
2886
2887                 /* Discard compl with partial DMA Lancer B0 */
2888                 if (unlikely(!rxcp->pkt_size)) {
2889                         be_rx_compl_discard(rxo, rxcp);
2890                         goto loop_continue;
2891                 }
2892
2893                 /* On BE drop pkts that arrive due to imperfect filtering in
2894                  * promiscuous mode on some skews
2895                  */
2896                 if (unlikely(rxcp->port != adapter->port_num &&
2897                              !lancer_chip(adapter))) {
2898                         be_rx_compl_discard(rxo, rxcp);
2899                         goto loop_continue;
2900                 }
2901
2902                 /* Don't do gro when we're busy_polling */
2903                 if (do_gro(rxcp) && polling != BUSY_POLLING)
2904                         be_rx_compl_process_gro(rxo, napi, rxcp);
2905                 else
2906                         be_rx_compl_process(rxo, napi, rxcp);
2907
2908 loop_continue:
2909                 frags_consumed += rxcp->num_rcvd;
2910                 be_rx_stats_update(rxo, rxcp);
2911         }
2912
2913         if (work_done) {
2914                 be_cq_notify(adapter, rx_cq->id, true, work_done);
2915
2916                 /* When an rx-obj gets into post_starved state, just
2917                  * let be_worker do the posting.
2918                  */
2919                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
2920                     !rxo->rx_post_starved)
2921                         be_post_rx_frags(rxo, GFP_ATOMIC,
2922                                          max_t(u32, MAX_RX_POST,
2923                                                frags_consumed));
2924         }
2925
2926         return work_done;
2927 }
2928
2929 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2930 {
2931         switch (status) {
2932         case BE_TX_COMP_HDR_PARSE_ERR:
2933                 tx_stats(txo)->tx_hdr_parse_err++;
2934                 break;
2935         case BE_TX_COMP_NDMA_ERR:
2936                 tx_stats(txo)->tx_dma_err++;
2937                 break;
2938         case BE_TX_COMP_ACL_ERR:
2939                 tx_stats(txo)->tx_spoof_check_err++;
2940                 break;
2941         }
2942 }
2943
2944 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2945 {
2946         switch (status) {
2947         case LANCER_TX_COMP_LSO_ERR:
2948                 tx_stats(txo)->tx_tso_err++;
2949                 break;
2950         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2951         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2952                 tx_stats(txo)->tx_spoof_check_err++;
2953                 break;
2954         case LANCER_TX_COMP_QINQ_ERR:
2955                 tx_stats(txo)->tx_qinq_err++;
2956                 break;
2957         case LANCER_TX_COMP_PARITY_ERR:
2958                 tx_stats(txo)->tx_internal_parity_err++;
2959                 break;
2960         case LANCER_TX_COMP_DMA_ERR:
2961                 tx_stats(txo)->tx_dma_err++;
2962                 break;
2963         }
2964 }
2965
2966 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
2967                           int idx)
2968 {
2969         int num_wrbs = 0, work_done = 0;
2970         struct be_tx_compl_info *txcp;
2971
2972         while ((txcp = be_tx_compl_get(txo))) {
2973                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
2974                 work_done++;
2975
2976                 if (txcp->status) {
2977                         if (lancer_chip(adapter))
2978                                 lancer_update_tx_err(txo, txcp->status);
2979                         else
2980                                 be_update_tx_err(txo, txcp->status);
2981                 }
2982         }
2983
2984         if (work_done) {
2985                 be_cq_notify(adapter, txo->cq.id, true, work_done);
2986                 atomic_sub(num_wrbs, &txo->q.used);
2987
2988                 /* As Tx wrbs have been freed up, wake up netdev queue
2989                  * if it was stopped due to lack of tx wrbs.  */
2990                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
2991                     be_can_txq_wake(txo)) {
2992                         netif_wake_subqueue(adapter->netdev, idx);
2993                 }
2994
2995                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
2996                 tx_stats(txo)->tx_compl += work_done;
2997                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
2998         }
2999 }
3000
3001 #ifdef CONFIG_NET_RX_BUSY_POLL
3002 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3003 {
3004         bool status = true;
3005
3006         spin_lock(&eqo->lock); /* BH is already disabled */
3007         if (eqo->state & BE_EQ_LOCKED) {
3008                 WARN_ON(eqo->state & BE_EQ_NAPI);
3009                 eqo->state |= BE_EQ_NAPI_YIELD;
3010                 status = false;
3011         } else {
3012                 eqo->state = BE_EQ_NAPI;
3013         }
3014         spin_unlock(&eqo->lock);
3015         return status;
3016 }
3017
3018 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3019 {
3020         spin_lock(&eqo->lock); /* BH is already disabled */
3021
3022         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3023         eqo->state = BE_EQ_IDLE;
3024
3025         spin_unlock(&eqo->lock);
3026 }
3027
3028 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3029 {
3030         bool status = true;
3031
3032         spin_lock_bh(&eqo->lock);
3033         if (eqo->state & BE_EQ_LOCKED) {
3034                 eqo->state |= BE_EQ_POLL_YIELD;
3035                 status = false;
3036         } else {
3037                 eqo->state |= BE_EQ_POLL;
3038         }
3039         spin_unlock_bh(&eqo->lock);
3040         return status;
3041 }
3042
3043 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3044 {
3045         spin_lock_bh(&eqo->lock);
3046
3047         WARN_ON(eqo->state & (BE_EQ_NAPI));
3048         eqo->state = BE_EQ_IDLE;
3049
3050         spin_unlock_bh(&eqo->lock);
3051 }
3052
3053 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3054 {
3055         spin_lock_init(&eqo->lock);
3056         eqo->state = BE_EQ_IDLE;
3057 }
3058
3059 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3060 {
3061         local_bh_disable();
3062
3063         /* It's enough to just acquire napi lock on the eqo to stop
3064          * be_busy_poll() from processing any queueus.
3065          */
3066         while (!be_lock_napi(eqo))
3067                 mdelay(1);
3068
3069         local_bh_enable();
3070 }
3071
3072 #else /* CONFIG_NET_RX_BUSY_POLL */
3073
3074 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3075 {
3076         return true;
3077 }
3078
3079 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3080 {
3081 }
3082
3083 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3084 {
3085         return false;
3086 }
3087
3088 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3089 {
3090 }
3091
3092 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3093 {
3094 }
3095
3096 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3097 {
3098 }
3099 #endif /* CONFIG_NET_RX_BUSY_POLL */
3100
3101 int be_poll(struct napi_struct *napi, int budget)
3102 {
3103         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3104         struct be_adapter *adapter = eqo->adapter;
3105         int max_work = 0, work, i, num_evts;
3106         struct be_rx_obj *rxo;
3107         struct be_tx_obj *txo;
3108         u32 mult_enc = 0;
3109
3110         num_evts = events_get(eqo);
3111
3112         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3113                 be_process_tx(adapter, txo, i);
3114
3115         if (be_lock_napi(eqo)) {
3116                 /* This loop will iterate twice for EQ0 in which
3117                  * completions of the last RXQ (default one) are also processed
3118                  * For other EQs the loop iterates only once
3119                  */
3120                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3121                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3122                         max_work = max(work, max_work);
3123                 }
3124                 be_unlock_napi(eqo);
3125         } else {
3126                 max_work = budget;
3127         }
3128
3129         if (is_mcc_eqo(eqo))
3130                 be_process_mcc(adapter);
3131
3132         if (max_work < budget) {
3133                 napi_complete(napi);
3134
3135                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3136                  * delay via a delay multiplier encoding value
3137                  */
3138                 if (skyhawk_chip(adapter))
3139                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3140
3141                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3142                              mult_enc);
3143         } else {
3144                 /* As we'll continue in polling mode, count and clear events */
3145                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3146         }
3147         return max_work;
3148 }
3149
3150 #ifdef CONFIG_NET_RX_BUSY_POLL
3151 static int be_busy_poll(struct napi_struct *napi)
3152 {
3153         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3154         struct be_adapter *adapter = eqo->adapter;
3155         struct be_rx_obj *rxo;
3156         int i, work = 0;
3157
3158         if (!be_lock_busy_poll(eqo))
3159                 return LL_FLUSH_BUSY;
3160
3161         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3162                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3163                 if (work)
3164                         break;
3165         }
3166
3167         be_unlock_busy_poll(eqo);
3168         return work;
3169 }
3170 #endif
3171
3172 void be_detect_error(struct be_adapter *adapter)
3173 {
3174         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3175         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3176         u32 i;
3177         struct device *dev = &adapter->pdev->dev;
3178
3179         if (be_check_error(adapter, BE_ERROR_HW))
3180                 return;
3181
3182         if (lancer_chip(adapter)) {
3183                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3184                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3185                         be_set_error(adapter, BE_ERROR_UE);
3186                         sliport_err1 = ioread32(adapter->db +
3187                                                 SLIPORT_ERROR1_OFFSET);
3188                         sliport_err2 = ioread32(adapter->db +
3189                                                 SLIPORT_ERROR2_OFFSET);
3190                         /* Do not log error messages if its a FW reset */
3191                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3192                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3193                                 dev_info(dev, "Firmware update in progress\n");
3194                         } else {
3195                                 dev_err(dev, "Error detected in the card\n");
3196                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3197                                         sliport_status);
3198                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3199                                         sliport_err1);
3200                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3201                                         sliport_err2);
3202                         }
3203                 }
3204         } else {
3205                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3206                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3207                 ue_lo_mask = ioread32(adapter->pcicfg +
3208                                       PCICFG_UE_STATUS_LOW_MASK);
3209                 ue_hi_mask = ioread32(adapter->pcicfg +
3210                                       PCICFG_UE_STATUS_HI_MASK);
3211
3212                 ue_lo = (ue_lo & ~ue_lo_mask);
3213                 ue_hi = (ue_hi & ~ue_hi_mask);
3214
3215                 /* On certain platforms BE hardware can indicate spurious UEs.
3216                  * Allow HW to stop working completely in case of a real UE.
3217                  * Hence not setting the hw_error for UE detection.
3218                  */
3219
3220                 if (ue_lo || ue_hi) {
3221                         dev_err(dev,
3222                                 "Unrecoverable Error detected in the adapter");
3223                         dev_err(dev, "Please reboot server to recover");
3224                         if (skyhawk_chip(adapter))
3225                                 be_set_error(adapter, BE_ERROR_UE);
3226
3227                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3228                                 if (ue_lo & 1)
3229                                         dev_err(dev, "UE: %s bit set\n",
3230                                                 ue_status_low_desc[i]);
3231                         }
3232                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3233                                 if (ue_hi & 1)
3234                                         dev_err(dev, "UE: %s bit set\n",
3235                                                 ue_status_hi_desc[i]);
3236                         }
3237                 }
3238         }
3239 }
3240
3241 static void be_msix_disable(struct be_adapter *adapter)
3242 {
3243         if (msix_enabled(adapter)) {
3244                 pci_disable_msix(adapter->pdev);
3245                 adapter->num_msix_vec = 0;
3246                 adapter->num_msix_roce_vec = 0;
3247         }
3248 }
3249
3250 static int be_msix_enable(struct be_adapter *adapter)
3251 {
3252         int i, num_vec;
3253         struct device *dev = &adapter->pdev->dev;
3254
3255         /* If RoCE is supported, program the max number of NIC vectors that
3256          * may be configured via set-channels, along with vectors needed for
3257          * RoCe. Else, just program the number we'll use initially.
3258          */
3259         if (be_roce_supported(adapter))
3260                 num_vec = min_t(int, 2 * be_max_eqs(adapter),
3261                                 2 * num_online_cpus());
3262         else
3263                 num_vec = adapter->cfg_num_qs;
3264
3265         for (i = 0; i < num_vec; i++)
3266                 adapter->msix_entries[i].entry = i;
3267
3268         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3269                                         MIN_MSIX_VECTORS, num_vec);
3270         if (num_vec < 0)
3271                 goto fail;
3272
3273         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3274                 adapter->num_msix_roce_vec = num_vec / 2;
3275                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3276                          adapter->num_msix_roce_vec);
3277         }
3278
3279         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3280
3281         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3282                  adapter->num_msix_vec);
3283         return 0;
3284
3285 fail:
3286         dev_warn(dev, "MSIx enable failed\n");
3287
3288         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3289         if (be_virtfn(adapter))
3290                 return num_vec;
3291         return 0;
3292 }
3293
3294 static inline int be_msix_vec_get(struct be_adapter *adapter,
3295                                   struct be_eq_obj *eqo)
3296 {
3297         return adapter->msix_entries[eqo->msix_idx].vector;
3298 }
3299
3300 static int be_msix_register(struct be_adapter *adapter)
3301 {
3302         struct net_device *netdev = adapter->netdev;
3303         struct be_eq_obj *eqo;
3304         int status, i, vec;
3305
3306         for_all_evt_queues(adapter, eqo, i) {
3307                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3308                 vec = be_msix_vec_get(adapter, eqo);
3309                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3310                 if (status)
3311                         goto err_msix;
3312
3313                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3314         }
3315
3316         return 0;
3317 err_msix:
3318         for (i--; i >= 0; i--) {
3319                 eqo = &adapter->eq_obj[i];
3320                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3321         }
3322         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3323                  status);
3324         be_msix_disable(adapter);
3325         return status;
3326 }
3327
3328 static int be_irq_register(struct be_adapter *adapter)
3329 {
3330         struct net_device *netdev = adapter->netdev;
3331         int status;
3332
3333         if (msix_enabled(adapter)) {
3334                 status = be_msix_register(adapter);
3335                 if (status == 0)
3336                         goto done;
3337                 /* INTx is not supported for VF */
3338                 if (be_virtfn(adapter))
3339                         return status;
3340         }
3341
3342         /* INTx: only the first EQ is used */
3343         netdev->irq = adapter->pdev->irq;
3344         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3345                              &adapter->eq_obj[0]);
3346         if (status) {
3347                 dev_err(&adapter->pdev->dev,
3348                         "INTx request IRQ failed - err %d\n", status);
3349                 return status;
3350         }
3351 done:
3352         adapter->isr_registered = true;
3353         return 0;
3354 }
3355
3356 static void be_irq_unregister(struct be_adapter *adapter)
3357 {
3358         struct net_device *netdev = adapter->netdev;
3359         struct be_eq_obj *eqo;
3360         int i, vec;
3361
3362         if (!adapter->isr_registered)
3363                 return;
3364
3365         /* INTx */
3366         if (!msix_enabled(adapter)) {
3367                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3368                 goto done;
3369         }
3370
3371         /* MSIx */
3372         for_all_evt_queues(adapter, eqo, i) {
3373                 vec = be_msix_vec_get(adapter, eqo);
3374                 irq_set_affinity_hint(vec, NULL);
3375                 free_irq(vec, eqo);
3376         }
3377
3378 done:
3379         adapter->isr_registered = false;
3380 }
3381
3382 static void be_rx_qs_destroy(struct be_adapter *adapter)
3383 {
3384         struct rss_info *rss = &adapter->rss_info;
3385         struct be_queue_info *q;
3386         struct be_rx_obj *rxo;
3387         int i;
3388
3389         for_all_rx_queues(adapter, rxo, i) {
3390                 q = &rxo->q;
3391                 if (q->created) {
3392                         /* If RXQs are destroyed while in an "out of buffer"
3393                          * state, there is a possibility of an HW stall on
3394                          * Lancer. So, post 64 buffers to each queue to relieve
3395                          * the "out of buffer" condition.
3396                          * Make sure there's space in the RXQ before posting.
3397                          */
3398                         if (lancer_chip(adapter)) {
3399                                 be_rx_cq_clean(rxo);
3400                                 if (atomic_read(&q->used) == 0)
3401                                         be_post_rx_frags(rxo, GFP_KERNEL,
3402                                                          MAX_RX_POST);
3403                         }
3404
3405                         be_cmd_rxq_destroy(adapter, q);
3406                         be_rx_cq_clean(rxo);
3407                         be_rxq_clean(rxo);
3408                 }
3409                 be_queue_free(adapter, q);
3410         }
3411
3412         if (rss->rss_flags) {
3413                 rss->rss_flags = RSS_ENABLE_NONE;
3414                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3415                                   128, rss->rss_hkey);
3416         }
3417 }
3418
3419 static void be_disable_if_filters(struct be_adapter *adapter)
3420 {
3421         be_cmd_pmac_del(adapter, adapter->if_handle,
3422                         adapter->pmac_id[0], 0);
3423
3424         be_clear_uc_list(adapter);
3425
3426         /* The IFACE flags are enabled in the open path and cleared
3427          * in the close path. When a VF gets detached from the host and
3428          * assigned to a VM the following happens:
3429          *      - VF's IFACE flags get cleared in the detach path
3430          *      - IFACE create is issued by the VF in the attach path
3431          * Due to a bug in the BE3/Skyhawk-R FW
3432          * (Lancer FW doesn't have the bug), the IFACE capability flags
3433          * specified along with the IFACE create cmd issued by a VF are not
3434          * honoured by FW.  As a consequence, if a *new* driver
3435          * (that enables/disables IFACE flags in open/close)
3436          * is loaded in the host and an *old* driver is * used by a VM/VF,
3437          * the IFACE gets created *without* the needed flags.
3438          * To avoid this, disable RX-filter flags only for Lancer.
3439          */
3440         if (lancer_chip(adapter)) {
3441                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3442                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3443         }
3444 }
3445
3446 static int be_close(struct net_device *netdev)
3447 {
3448         struct be_adapter *adapter = netdev_priv(netdev);
3449         struct be_eq_obj *eqo;
3450         int i;
3451
3452         /* This protection is needed as be_close() may be called even when the
3453          * adapter is in cleared state (after eeh perm failure)
3454          */
3455         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3456                 return 0;
3457
3458         be_disable_if_filters(adapter);
3459
3460         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3461                 for_all_evt_queues(adapter, eqo, i) {
3462                         napi_disable(&eqo->napi);
3463                         be_disable_busy_poll(eqo);
3464                 }
3465                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3466         }
3467
3468         be_async_mcc_disable(adapter);
3469
3470         /* Wait for all pending tx completions to arrive so that
3471          * all tx skbs are freed.
3472          */
3473         netif_tx_disable(netdev);
3474         be_tx_compl_clean(adapter);
3475
3476         be_rx_qs_destroy(adapter);
3477
3478         for_all_evt_queues(adapter, eqo, i) {
3479                 if (msix_enabled(adapter))
3480                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3481                 else
3482                         synchronize_irq(netdev->irq);
3483                 be_eq_clean(eqo);
3484         }
3485
3486         be_irq_unregister(adapter);
3487
3488         return 0;
3489 }
3490
3491 static int be_rx_qs_create(struct be_adapter *adapter)
3492 {
3493         struct rss_info *rss = &adapter->rss_info;
3494         u8 rss_key[RSS_HASH_KEY_LEN];
3495         struct be_rx_obj *rxo;
3496         int rc, i, j;
3497
3498         for_all_rx_queues(adapter, rxo, i) {
3499                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3500                                     sizeof(struct be_eth_rx_d));
3501                 if (rc)
3502                         return rc;
3503         }
3504
3505         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3506                 rxo = default_rxo(adapter);
3507                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3508                                        rx_frag_size, adapter->if_handle,
3509                                        false, &rxo->rss_id);
3510                 if (rc)
3511                         return rc;
3512         }
3513
3514         for_all_rss_queues(adapter, rxo, i) {
3515                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3516                                        rx_frag_size, adapter->if_handle,
3517                                        true, &rxo->rss_id);
3518                 if (rc)
3519                         return rc;
3520         }
3521
3522         if (be_multi_rxq(adapter)) {
3523                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3524                         for_all_rss_queues(adapter, rxo, i) {
3525                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3526                                         break;
3527                                 rss->rsstable[j + i] = rxo->rss_id;
3528                                 rss->rss_queue[j + i] = i;
3529                         }
3530                 }
3531                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3532                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3533
3534                 if (!BEx_chip(adapter))
3535                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3536                                 RSS_ENABLE_UDP_IPV6;
3537
3538                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3539                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3540                                        RSS_INDIR_TABLE_LEN, rss_key);
3541                 if (rc) {
3542                         rss->rss_flags = RSS_ENABLE_NONE;
3543                         return rc;
3544                 }
3545
3546                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3547         } else {
3548                 /* Disable RSS, if only default RX Q is created */
3549                 rss->rss_flags = RSS_ENABLE_NONE;
3550         }
3551
3552
3553         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3554          * which is a queue empty condition
3555          */
3556         for_all_rx_queues(adapter, rxo, i)
3557                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3558
3559         return 0;
3560 }
3561
3562 static int be_enable_if_filters(struct be_adapter *adapter)
3563 {
3564         int status;
3565
3566         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3567         if (status)
3568                 return status;
3569
3570         /* For BE3 VFs, the PF programs the initial MAC address */
3571         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3572                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3573                                          adapter->if_handle,
3574                                          &adapter->pmac_id[0], 0);
3575                 if (status)
3576                         return status;
3577         }
3578
3579         if (adapter->vlans_added)
3580                 be_vid_config(adapter);
3581
3582         be_set_rx_mode(adapter->netdev);
3583
3584         return 0;
3585 }
3586
3587 static int be_open(struct net_device *netdev)
3588 {
3589         struct be_adapter *adapter = netdev_priv(netdev);
3590         struct be_eq_obj *eqo;
3591         struct be_rx_obj *rxo;
3592         struct be_tx_obj *txo;
3593         u8 link_status;
3594         int status, i;
3595
3596         status = be_rx_qs_create(adapter);
3597         if (status)
3598                 goto err;
3599
3600         status = be_enable_if_filters(adapter);
3601         if (status)
3602                 goto err;
3603
3604         status = be_irq_register(adapter);
3605         if (status)
3606                 goto err;
3607
3608         for_all_rx_queues(adapter, rxo, i)
3609                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3610
3611         for_all_tx_queues(adapter, txo, i)
3612                 be_cq_notify(adapter, txo->cq.id, true, 0);
3613
3614         be_async_mcc_enable(adapter);
3615
3616         for_all_evt_queues(adapter, eqo, i) {
3617                 napi_enable(&eqo->napi);
3618                 be_enable_busy_poll(eqo);
3619                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3620         }
3621         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3622
3623         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3624         if (!status)
3625                 be_link_status_update(adapter, link_status);
3626
3627         netif_tx_start_all_queues(netdev);
3628 #ifdef CONFIG_BE2NET_VXLAN
3629         if (skyhawk_chip(adapter))
3630                 vxlan_get_rx_port(netdev);
3631 #endif
3632
3633         return 0;
3634 err:
3635         be_close(adapter->netdev);
3636         return -EIO;
3637 }
3638
3639 static int be_setup_wol(struct be_adapter *adapter, bool enable)
3640 {
3641         struct device *dev = &adapter->pdev->dev;
3642         struct be_dma_mem cmd;
3643         u8 mac[ETH_ALEN];
3644         int status;
3645
3646         eth_zero_addr(mac);
3647
3648         cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
3649         cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
3650         if (!cmd.va)
3651                 return -ENOMEM;
3652
3653         if (enable) {
3654                 status = pci_write_config_dword(adapter->pdev,
3655                                                 PCICFG_PM_CONTROL_OFFSET,
3656                                                 PCICFG_PM_CONTROL_MASK);
3657                 if (status) {
3658                         dev_err(dev, "Could not enable Wake-on-lan\n");
3659                         goto err;
3660                 }
3661         } else {
3662                 ether_addr_copy(mac, adapter->netdev->dev_addr);
3663         }
3664
3665         status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
3666         pci_enable_wake(adapter->pdev, PCI_D3hot, enable);
3667         pci_enable_wake(adapter->pdev, PCI_D3cold, enable);
3668 err:
3669         dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma);
3670         return status;
3671 }
3672
3673 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3674 {
3675         u32 addr;
3676
3677         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3678
3679         mac[5] = (u8)(addr & 0xFF);
3680         mac[4] = (u8)((addr >> 8) & 0xFF);
3681         mac[3] = (u8)((addr >> 16) & 0xFF);
3682         /* Use the OUI from the current MAC address */
3683         memcpy(mac, adapter->netdev->dev_addr, 3);
3684 }
3685
3686 /*
3687  * Generate a seed MAC address from the PF MAC Address using jhash.
3688  * MAC Address for VFs are assigned incrementally starting from the seed.
3689  * These addresses are programmed in the ASIC by the PF and the VF driver
3690  * queries for the MAC address during its probe.
3691  */
3692 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3693 {
3694         u32 vf;
3695         int status = 0;
3696         u8 mac[ETH_ALEN];
3697         struct be_vf_cfg *vf_cfg;
3698
3699         be_vf_eth_addr_generate(adapter, mac);
3700
3701         for_all_vfs(adapter, vf_cfg, vf) {
3702                 if (BEx_chip(adapter))
3703                         status = be_cmd_pmac_add(adapter, mac,
3704                                                  vf_cfg->if_handle,
3705                                                  &vf_cfg->pmac_id, vf + 1);
3706                 else
3707                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3708                                                 vf + 1);
3709
3710                 if (status)
3711                         dev_err(&adapter->pdev->dev,
3712                                 "Mac address assignment failed for VF %d\n",
3713                                 vf);
3714                 else
3715                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3716
3717                 mac[5] += 1;
3718         }
3719         return status;
3720 }
3721
3722 static int be_vfs_mac_query(struct be_adapter *adapter)
3723 {
3724         int status, vf;
3725         u8 mac[ETH_ALEN];
3726         struct be_vf_cfg *vf_cfg;
3727
3728         for_all_vfs(adapter, vf_cfg, vf) {
3729                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3730                                                mac, vf_cfg->if_handle,
3731                                                false, vf+1);
3732                 if (status)
3733                         return status;
3734                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3735         }
3736         return 0;
3737 }
3738
3739 static void be_vf_clear(struct be_adapter *adapter)
3740 {
3741         struct be_vf_cfg *vf_cfg;
3742         u32 vf;
3743
3744         if (pci_vfs_assigned(adapter->pdev)) {
3745                 dev_warn(&adapter->pdev->dev,
3746                          "VFs are assigned to VMs: not disabling VFs\n");
3747                 goto done;
3748         }
3749
3750         pci_disable_sriov(adapter->pdev);
3751
3752         for_all_vfs(adapter, vf_cfg, vf) {
3753                 if (BEx_chip(adapter))
3754                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3755                                         vf_cfg->pmac_id, vf + 1);
3756                 else
3757                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3758                                        vf + 1);
3759
3760                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3761         }
3762 done:
3763         kfree(adapter->vf_cfg);
3764         adapter->num_vfs = 0;
3765         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3766 }
3767
3768 static void be_clear_queues(struct be_adapter *adapter)
3769 {
3770         be_mcc_queues_destroy(adapter);
3771         be_rx_cqs_destroy(adapter);
3772         be_tx_queues_destroy(adapter);
3773         be_evt_queues_destroy(adapter);
3774 }
3775
3776 static void be_cancel_worker(struct be_adapter *adapter)
3777 {
3778         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3779                 cancel_delayed_work_sync(&adapter->work);
3780                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3781         }
3782 }
3783
3784 static void be_cancel_err_detection(struct be_adapter *adapter)
3785 {
3786         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3787                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3788                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3789         }
3790 }
3791
3792 #ifdef CONFIG_BE2NET_VXLAN
3793 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3794 {
3795         struct net_device *netdev = adapter->netdev;
3796
3797         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3798                 be_cmd_manage_iface(adapter, adapter->if_handle,
3799                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3800
3801         if (adapter->vxlan_port)
3802                 be_cmd_set_vxlan_port(adapter, 0);
3803
3804         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3805         adapter->vxlan_port = 0;
3806
3807         netdev->hw_enc_features = 0;
3808         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3809         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3810 }
3811 #endif
3812
3813 static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs)
3814 {
3815         struct be_resources res = adapter->pool_res;
3816         u16 num_vf_qs = 1;
3817
3818         /* Distribute the queue resources among the PF and it's VFs
3819          * Do not distribute queue resources in multi-channel configuration.
3820          */
3821         if (num_vfs && !be_is_mc(adapter)) {
3822                  /* Divide the qpairs evenly among the VFs and the PF, capped
3823                   * at VF-EQ-count. Any remainder qpairs belong to the PF.
3824                   */
3825                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3826                                 res.max_rss_qs / (num_vfs + 1));
3827
3828                 /* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable
3829                  * interfaces per port. Provide RSS on VFs, only if number
3830                  * of VFs requested is less than MAX_RSS_IFACES limit.
3831                  */
3832                 if (num_vfs >= MAX_RSS_IFACES)
3833                         num_vf_qs = 1;
3834         }
3835         return num_vf_qs;
3836 }
3837
3838 static int be_clear(struct be_adapter *adapter)
3839 {
3840         struct pci_dev *pdev = adapter->pdev;
3841         u16 num_vf_qs;
3842
3843         be_cancel_worker(adapter);
3844
3845         if (sriov_enabled(adapter))
3846                 be_vf_clear(adapter);
3847
3848         /* Re-configure FW to distribute resources evenly across max-supported
3849          * number of VFs, only when VFs are not already enabled.
3850          */
3851         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3852             !pci_vfs_assigned(pdev)) {
3853                 num_vf_qs = be_calculate_vf_qs(adapter,
3854                                                pci_sriov_get_totalvfs(pdev));
3855                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3856                                         pci_sriov_get_totalvfs(pdev),
3857                                         num_vf_qs);
3858         }
3859
3860 #ifdef CONFIG_BE2NET_VXLAN
3861         be_disable_vxlan_offloads(adapter);
3862 #endif
3863         kfree(adapter->pmac_id);
3864         adapter->pmac_id = NULL;
3865
3866         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3867
3868         be_clear_queues(adapter);
3869
3870         be_msix_disable(adapter);
3871         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3872         return 0;
3873 }
3874
3875 static int be_vfs_if_create(struct be_adapter *adapter)
3876 {
3877         struct be_resources res = {0};
3878         u32 cap_flags, en_flags, vf;
3879         struct be_vf_cfg *vf_cfg;
3880         int status;
3881
3882         /* If a FW profile exists, then cap_flags are updated */
3883         cap_flags = BE_VF_IF_EN_FLAGS;
3884
3885         for_all_vfs(adapter, vf_cfg, vf) {
3886                 if (!BE3_chip(adapter)) {
3887                         status = be_cmd_get_profile_config(adapter, &res,
3888                                                            RESOURCE_LIMITS,
3889                                                            vf + 1);
3890                         if (!status) {
3891                                 cap_flags = res.if_cap_flags;
3892                                 /* Prevent VFs from enabling VLAN promiscuous
3893                                  * mode
3894                                  */
3895                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3896                         }
3897                 }
3898
3899                 /* PF should enable IF flags during proxy if_create call */
3900                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
3901                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
3902                                           &vf_cfg->if_handle, vf + 1);
3903                 if (status)
3904                         return status;
3905         }
3906
3907         return 0;
3908 }
3909
3910 static int be_vf_setup_init(struct be_adapter *adapter)
3911 {
3912         struct be_vf_cfg *vf_cfg;
3913         int vf;
3914
3915         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
3916                                   GFP_KERNEL);
3917         if (!adapter->vf_cfg)
3918                 return -ENOMEM;
3919
3920         for_all_vfs(adapter, vf_cfg, vf) {
3921                 vf_cfg->if_handle = -1;
3922                 vf_cfg->pmac_id = -1;
3923         }
3924         return 0;
3925 }
3926
3927 static int be_vf_setup(struct be_adapter *adapter)
3928 {
3929         struct device *dev = &adapter->pdev->dev;
3930         struct be_vf_cfg *vf_cfg;
3931         int status, old_vfs, vf;
3932         bool spoofchk;
3933
3934         old_vfs = pci_num_vf(adapter->pdev);
3935
3936         status = be_vf_setup_init(adapter);
3937         if (status)
3938                 goto err;
3939
3940         if (old_vfs) {
3941                 for_all_vfs(adapter, vf_cfg, vf) {
3942                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
3943                         if (status)
3944                                 goto err;
3945                 }
3946
3947                 status = be_vfs_mac_query(adapter);
3948                 if (status)
3949                         goto err;
3950         } else {
3951                 status = be_vfs_if_create(adapter);
3952                 if (status)
3953                         goto err;
3954
3955                 status = be_vf_eth_addr_config(adapter);
3956                 if (status)
3957                         goto err;
3958         }
3959
3960         for_all_vfs(adapter, vf_cfg, vf) {
3961                 /* Allow VFs to programs MAC/VLAN filters */
3962                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
3963                                                   vf + 1);
3964                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
3965                         status = be_cmd_set_fn_privileges(adapter,
3966                                                           vf_cfg->privileges |
3967                                                           BE_PRIV_FILTMGMT,
3968                                                           vf + 1);
3969                         if (!status) {
3970                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
3971                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
3972                                          vf);
3973                         }
3974                 }
3975
3976                 /* Allow full available bandwidth */
3977                 if (!old_vfs)
3978                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
3979
3980                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
3981                                                vf_cfg->if_handle, NULL,
3982                                                &spoofchk);
3983                 if (!status)
3984                         vf_cfg->spoofchk = spoofchk;
3985
3986                 if (!old_vfs) {
3987                         be_cmd_enable_vf(adapter, vf + 1);
3988                         be_cmd_set_logical_link_config(adapter,
3989                                                        IFLA_VF_LINK_STATE_AUTO,
3990                                                        vf+1);
3991                 }
3992         }
3993
3994         if (!old_vfs) {
3995                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
3996                 if (status) {
3997                         dev_err(dev, "SRIOV enable failed\n");
3998                         adapter->num_vfs = 0;
3999                         goto err;
4000                 }
4001         }
4002
4003         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4004         return 0;
4005 err:
4006         dev_err(dev, "VF setup failed\n");
4007         be_vf_clear(adapter);
4008         return status;
4009 }
4010
4011 /* Converting function_mode bits on BE3 to SH mc_type enums */
4012
4013 static u8 be_convert_mc_type(u32 function_mode)
4014 {
4015         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4016                 return vNIC1;
4017         else if (function_mode & QNQ_MODE)
4018                 return FLEX10;
4019         else if (function_mode & VNIC_MODE)
4020                 return vNIC2;
4021         else if (function_mode & UMC_ENABLED)
4022                 return UMC;
4023         else
4024                 return MC_NONE;
4025 }
4026
4027 /* On BE2/BE3 FW does not suggest the supported limits */
4028 static void BEx_get_resources(struct be_adapter *adapter,
4029                               struct be_resources *res)
4030 {
4031         bool use_sriov = adapter->num_vfs ? 1 : 0;
4032
4033         if (be_physfn(adapter))
4034                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4035         else
4036                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4037
4038         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4039
4040         if (be_is_mc(adapter)) {
4041                 /* Assuming that there are 4 channels per port,
4042                  * when multi-channel is enabled
4043                  */
4044                 if (be_is_qnq_mode(adapter))
4045                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4046                 else
4047                         /* In a non-qnq multichannel mode, the pvid
4048                          * takes up one vlan entry
4049                          */
4050                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4051         } else {
4052                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4053         }
4054
4055         res->max_mcast_mac = BE_MAX_MC;
4056
4057         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4058          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4059          *    *only* if it is RSS-capable.
4060          */
4061         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4062             be_virtfn(adapter) ||
4063             (be_is_mc(adapter) &&
4064              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4065                 res->max_tx_qs = 1;
4066         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4067                 struct be_resources super_nic_res = {0};
4068
4069                 /* On a SuperNIC profile, the driver needs to use the
4070                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4071                  */
4072                 be_cmd_get_profile_config(adapter, &super_nic_res,
4073                                           RESOURCE_LIMITS, 0);
4074                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4075                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4076         } else {
4077                 res->max_tx_qs = BE3_MAX_TX_QS;
4078         }
4079
4080         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4081             !use_sriov && be_physfn(adapter))
4082                 res->max_rss_qs = (adapter->be3_native) ?
4083                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4084         res->max_rx_qs = res->max_rss_qs + 1;
4085
4086         if (be_physfn(adapter))
4087                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4088                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4089         else
4090                 res->max_evt_qs = 1;
4091
4092         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4093         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4094         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4095                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4096 }
4097
4098 static void be_setup_init(struct be_adapter *adapter)
4099 {
4100         adapter->vlan_prio_bmap = 0xff;
4101         adapter->phy.link_speed = -1;
4102         adapter->if_handle = -1;
4103         adapter->be3_native = false;
4104         adapter->if_flags = 0;
4105         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4106         if (be_physfn(adapter))
4107                 adapter->cmd_privileges = MAX_PRIVILEGES;
4108         else
4109                 adapter->cmd_privileges = MIN_PRIVILEGES;
4110 }
4111
4112 static int be_get_sriov_config(struct be_adapter *adapter)
4113 {
4114         struct be_resources res = {0};
4115         int max_vfs, old_vfs;
4116
4117         be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0);
4118
4119         /* Some old versions of BE3 FW don't report max_vfs value */
4120         if (BE3_chip(adapter) && !res.max_vfs) {
4121                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4122                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4123         }
4124
4125         adapter->pool_res = res;
4126
4127         /* If during previous unload of the driver, the VFs were not disabled,
4128          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4129          * Instead use the TotalVFs value stored in the pci-dev struct.
4130          */
4131         old_vfs = pci_num_vf(adapter->pdev);
4132         if (old_vfs) {
4133                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4134                          old_vfs);
4135
4136                 adapter->pool_res.max_vfs =
4137                         pci_sriov_get_totalvfs(adapter->pdev);
4138                 adapter->num_vfs = old_vfs;
4139         }
4140
4141         return 0;
4142 }
4143
4144 static void be_alloc_sriov_res(struct be_adapter *adapter)
4145 {
4146         int old_vfs = pci_num_vf(adapter->pdev);
4147         u16 num_vf_qs;
4148         int status;
4149
4150         be_get_sriov_config(adapter);
4151
4152         if (!old_vfs)
4153                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4154
4155         /* When the HW is in SRIOV capable configuration, the PF-pool
4156          * resources are given to PF during driver load, if there are no
4157          * old VFs. This facility is not available in BE3 FW.
4158          * Also, this is done by FW in Lancer chip.
4159          */
4160         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4161                 num_vf_qs = be_calculate_vf_qs(adapter, 0);
4162                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4163                                                  num_vf_qs);
4164                 if (status)
4165                         dev_err(&adapter->pdev->dev,
4166                                 "Failed to optimize SRIOV resources\n");
4167         }
4168 }
4169
4170 static int be_get_resources(struct be_adapter *adapter)
4171 {
4172         struct device *dev = &adapter->pdev->dev;
4173         struct be_resources res = {0};
4174         int status;
4175
4176         if (BEx_chip(adapter)) {
4177                 BEx_get_resources(adapter, &res);
4178                 adapter->res = res;
4179         }
4180
4181         /* For Lancer, SH etc read per-function resource limits from FW.
4182          * GET_FUNC_CONFIG returns per function guaranteed limits.
4183          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4184          */
4185         if (!BEx_chip(adapter)) {
4186                 status = be_cmd_get_func_config(adapter, &res);
4187                 if (status)
4188                         return status;
4189
4190                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4191                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4192                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4193                         res.max_rss_qs -= 1;
4194
4195                 /* If RoCE may be enabled stash away half the EQs for RoCE */
4196                 if (be_roce_supported(adapter))
4197                         res.max_evt_qs /= 2;
4198                 adapter->res = res;
4199         }
4200
4201         /* If FW supports RSS default queue, then skip creating non-RSS
4202          * queue for non-IP traffic.
4203          */
4204         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4205                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4206
4207         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4208                  be_max_txqs(adapter), be_max_rxqs(adapter),
4209                  be_max_rss(adapter), be_max_eqs(adapter),
4210                  be_max_vfs(adapter));
4211         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4212                  be_max_uc(adapter), be_max_mc(adapter),
4213                  be_max_vlans(adapter));
4214
4215         /* Sanitize cfg_num_qs based on HW and platform limits */
4216         adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(),
4217                                     be_max_qs(adapter));
4218         return 0;
4219 }
4220
4221 static int be_get_config(struct be_adapter *adapter)
4222 {
4223         int status, level;
4224         u16 profile_id;
4225
4226         status = be_cmd_get_cntl_attributes(adapter);
4227         if (status)
4228                 return status;
4229
4230         status = be_cmd_query_fw_cfg(adapter);
4231         if (status)
4232                 return status;
4233
4234         if (!lancer_chip(adapter) && be_physfn(adapter))
4235                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4236
4237         if (BEx_chip(adapter)) {
4238                 level = be_cmd_get_fw_log_level(adapter);
4239                 adapter->msg_enable =
4240                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4241         }
4242
4243         be_cmd_get_acpi_wol_cap(adapter);
4244
4245         be_cmd_query_port_name(adapter);
4246
4247         if (be_physfn(adapter)) {
4248                 status = be_cmd_get_active_profile(adapter, &profile_id);
4249                 if (!status)
4250                         dev_info(&adapter->pdev->dev,
4251                                  "Using profile 0x%x\n", profile_id);
4252         }
4253
4254         status = be_get_resources(adapter);
4255         if (status)
4256                 return status;
4257
4258         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4259                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4260         if (!adapter->pmac_id)
4261                 return -ENOMEM;
4262
4263         return 0;
4264 }
4265
4266 static int be_mac_setup(struct be_adapter *adapter)
4267 {
4268         u8 mac[ETH_ALEN];
4269         int status;
4270
4271         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4272                 status = be_cmd_get_perm_mac(adapter, mac);
4273                 if (status)
4274                         return status;
4275
4276                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4277                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4278         }
4279
4280         return 0;
4281 }
4282
4283 static void be_schedule_worker(struct be_adapter *adapter)
4284 {
4285         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4286         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4287 }
4288
4289 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4290 {
4291         schedule_delayed_work(&adapter->be_err_detection_work,
4292                               msecs_to_jiffies(delay));
4293         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4294 }
4295
4296 static int be_setup_queues(struct be_adapter *adapter)
4297 {
4298         struct net_device *netdev = adapter->netdev;
4299         int status;
4300
4301         status = be_evt_queues_create(adapter);
4302         if (status)
4303                 goto err;
4304
4305         status = be_tx_qs_create(adapter);
4306         if (status)
4307                 goto err;
4308
4309         status = be_rx_cqs_create(adapter);
4310         if (status)
4311                 goto err;
4312
4313         status = be_mcc_queues_create(adapter);
4314         if (status)
4315                 goto err;
4316
4317         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4318         if (status)
4319                 goto err;
4320
4321         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4322         if (status)
4323                 goto err;
4324
4325         return 0;
4326 err:
4327         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4328         return status;
4329 }
4330
4331 static int be_if_create(struct be_adapter *adapter)
4332 {
4333         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4334         u32 cap_flags = be_if_cap_flags(adapter);
4335         int status;
4336
4337         if (adapter->cfg_num_qs == 1)
4338                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4339
4340         en_flags &= cap_flags;
4341         /* will enable all the needed filter flags in be_open() */
4342         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4343                                   &adapter->if_handle, 0);
4344
4345         return status;
4346 }
4347
4348 int be_update_queues(struct be_adapter *adapter)
4349 {
4350         struct net_device *netdev = adapter->netdev;
4351         int status;
4352
4353         if (netif_running(netdev))
4354                 be_close(netdev);
4355
4356         be_cancel_worker(adapter);
4357
4358         /* If any vectors have been shared with RoCE we cannot re-program
4359          * the MSIx table.
4360          */
4361         if (!adapter->num_msix_roce_vec)
4362                 be_msix_disable(adapter);
4363
4364         be_clear_queues(adapter);
4365         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4366         if (status)
4367                 return status;
4368
4369         if (!msix_enabled(adapter)) {
4370                 status = be_msix_enable(adapter);
4371                 if (status)
4372                         return status;
4373         }
4374
4375         status = be_if_create(adapter);
4376         if (status)
4377                 return status;
4378
4379         status = be_setup_queues(adapter);
4380         if (status)
4381                 return status;
4382
4383         be_schedule_worker(adapter);
4384
4385         if (netif_running(netdev))
4386                 status = be_open(netdev);
4387
4388         return status;
4389 }
4390
4391 static inline int fw_major_num(const char *fw_ver)
4392 {
4393         int fw_major = 0, i;
4394
4395         i = sscanf(fw_ver, "%d.", &fw_major);
4396         if (i != 1)
4397                 return 0;
4398
4399         return fw_major;
4400 }
4401
4402 /* If any VFs are already enabled don't FLR the PF */
4403 static bool be_reset_required(struct be_adapter *adapter)
4404 {
4405         return pci_num_vf(adapter->pdev) ? false : true;
4406 }
4407
4408 /* Wait for the FW to be ready and perform the required initialization */
4409 static int be_func_init(struct be_adapter *adapter)
4410 {
4411         int status;
4412
4413         status = be_fw_wait_ready(adapter);
4414         if (status)
4415                 return status;
4416
4417         if (be_reset_required(adapter)) {
4418                 status = be_cmd_reset_function(adapter);
4419                 if (status)
4420                         return status;
4421
4422                 /* Wait for interrupts to quiesce after an FLR */
4423                 msleep(100);
4424
4425                 /* We can clear all errors when function reset succeeds */
4426                 be_clear_error(adapter, BE_CLEAR_ALL);
4427         }
4428
4429         /* Tell FW we're ready to fire cmds */
4430         status = be_cmd_fw_init(adapter);
4431         if (status)
4432                 return status;
4433
4434         /* Allow interrupts for other ULPs running on NIC function */
4435         be_intr_set(adapter, true);
4436
4437         return 0;
4438 }
4439
4440 static int be_setup(struct be_adapter *adapter)
4441 {
4442         struct device *dev = &adapter->pdev->dev;
4443         int status;
4444
4445         status = be_func_init(adapter);
4446         if (status)
4447                 return status;
4448
4449         be_setup_init(adapter);
4450
4451         if (!lancer_chip(adapter))
4452                 be_cmd_req_native_mode(adapter);
4453
4454         /* invoke this cmd first to get pf_num and vf_num which are needed
4455          * for issuing profile related cmds
4456          */
4457         if (!BEx_chip(adapter)) {
4458                 status = be_cmd_get_func_config(adapter, NULL);
4459                 if (status)
4460                         return status;
4461         }
4462
4463         if (!BE2_chip(adapter) && be_physfn(adapter))
4464                 be_alloc_sriov_res(adapter);
4465
4466         status = be_get_config(adapter);
4467         if (status)
4468                 goto err;
4469
4470         status = be_msix_enable(adapter);
4471         if (status)
4472                 goto err;
4473
4474         /* will enable all the needed filter flags in be_open() */
4475         status = be_if_create(adapter);
4476         if (status)
4477                 goto err;
4478
4479         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4480         rtnl_lock();
4481         status = be_setup_queues(adapter);
4482         rtnl_unlock();
4483         if (status)
4484                 goto err;
4485
4486         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4487
4488         status = be_mac_setup(adapter);
4489         if (status)
4490                 goto err;
4491
4492         be_cmd_get_fw_ver(adapter);
4493         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4494
4495         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4496                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4497                         adapter->fw_ver);
4498                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4499         }
4500
4501         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4502                                          adapter->rx_fc);
4503         if (status)
4504                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4505                                         &adapter->rx_fc);
4506
4507         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4508                  adapter->tx_fc, adapter->rx_fc);
4509
4510         if (be_physfn(adapter))
4511                 be_cmd_set_logical_link_config(adapter,
4512                                                IFLA_VF_LINK_STATE_AUTO, 0);
4513
4514         if (adapter->num_vfs)
4515                 be_vf_setup(adapter);
4516
4517         status = be_cmd_get_phy_info(adapter);
4518         if (!status && be_pause_supported(adapter))
4519                 adapter->phy.fc_autoneg = 1;
4520
4521         be_schedule_worker(adapter);
4522         adapter->flags |= BE_FLAGS_SETUP_DONE;
4523         return 0;
4524 err:
4525         be_clear(adapter);
4526         return status;
4527 }
4528
4529 #ifdef CONFIG_NET_POLL_CONTROLLER
4530 static void be_netpoll(struct net_device *netdev)
4531 {
4532         struct be_adapter *adapter = netdev_priv(netdev);
4533         struct be_eq_obj *eqo;
4534         int i;
4535
4536         for_all_evt_queues(adapter, eqo, i) {
4537                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4538                 napi_schedule(&eqo->napi);
4539         }
4540 }
4541 #endif
4542
4543 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4544 {
4545         const struct firmware *fw;
4546         int status;
4547
4548         if (!netif_running(adapter->netdev)) {
4549                 dev_err(&adapter->pdev->dev,
4550                         "Firmware load not allowed (interface is down)\n");
4551                 return -ENETDOWN;
4552         }
4553
4554         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4555         if (status)
4556                 goto fw_exit;
4557
4558         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4559
4560         if (lancer_chip(adapter))
4561                 status = lancer_fw_download(adapter, fw);
4562         else
4563                 status = be_fw_download(adapter, fw);
4564
4565         if (!status)
4566                 be_cmd_get_fw_ver(adapter);
4567
4568 fw_exit:
4569         release_firmware(fw);
4570         return status;
4571 }
4572
4573 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4574                                  u16 flags)
4575 {
4576         struct be_adapter *adapter = netdev_priv(dev);
4577         struct nlattr *attr, *br_spec;
4578         int rem;
4579         int status = 0;
4580         u16 mode = 0;
4581
4582         if (!sriov_enabled(adapter))
4583                 return -EOPNOTSUPP;
4584
4585         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4586         if (!br_spec)
4587                 return -EINVAL;
4588
4589         nla_for_each_nested(attr, br_spec, rem) {
4590                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4591                         continue;
4592
4593                 if (nla_len(attr) < sizeof(mode))
4594                         return -EINVAL;
4595
4596                 mode = nla_get_u16(attr);
4597                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4598                         return -EOPNOTSUPP;
4599
4600                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4601                         return -EINVAL;
4602
4603                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4604                                                adapter->if_handle,
4605                                                mode == BRIDGE_MODE_VEPA ?
4606                                                PORT_FWD_TYPE_VEPA :
4607                                                PORT_FWD_TYPE_VEB, 0);
4608                 if (status)
4609                         goto err;
4610
4611                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4612                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4613
4614                 return status;
4615         }
4616 err:
4617         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4618                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4619
4620         return status;
4621 }
4622
4623 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4624                                  struct net_device *dev, u32 filter_mask,
4625                                  int nlflags)
4626 {
4627         struct be_adapter *adapter = netdev_priv(dev);
4628         int status = 0;
4629         u8 hsw_mode;
4630
4631         /* BE and Lancer chips support VEB mode only */
4632         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4633                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4634                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4635                         return 0;
4636                 hsw_mode = PORT_FWD_TYPE_VEB;
4637         } else {
4638                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4639                                                adapter->if_handle, &hsw_mode,
4640                                                NULL);
4641                 if (status)
4642                         return 0;
4643
4644                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4645                         return 0;
4646         }
4647
4648         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4649                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4650                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4651                                        0, 0, nlflags, filter_mask, NULL);
4652 }
4653
4654 #ifdef CONFIG_BE2NET_VXLAN
4655 /* VxLAN offload Notes:
4656  *
4657  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4658  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4659  * is expected to work across all types of IP tunnels once exported. Skyhawk
4660  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4661  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4662  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4663  * those other tunnels are unexported on the fly through ndo_features_check().
4664  *
4665  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4666  * adds more than one port, disable offloads and don't re-enable them again
4667  * until after all the tunnels are removed.
4668  */
4669 static void be_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
4670                               __be16 port)
4671 {
4672         struct be_adapter *adapter = netdev_priv(netdev);
4673         struct device *dev = &adapter->pdev->dev;
4674         int status;
4675
4676         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4677                 return;
4678
4679         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4680                 adapter->vxlan_port_aliases++;
4681                 return;
4682         }
4683
4684         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4685                 dev_info(dev,
4686                          "Only one UDP port supported for VxLAN offloads\n");
4687                 dev_info(dev, "Disabling VxLAN offloads\n");
4688                 adapter->vxlan_port_count++;
4689                 goto err;
4690         }
4691
4692         if (adapter->vxlan_port_count++ >= 1)
4693                 return;
4694
4695         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4696                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4697         if (status) {
4698                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4699                 goto err;
4700         }
4701
4702         status = be_cmd_set_vxlan_port(adapter, port);
4703         if (status) {
4704                 dev_warn(dev, "Failed to add VxLAN port\n");
4705                 goto err;
4706         }
4707         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4708         adapter->vxlan_port = port;
4709
4710         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4711                                    NETIF_F_TSO | NETIF_F_TSO6 |
4712                                    NETIF_F_GSO_UDP_TUNNEL;
4713         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4714         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4715
4716         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4717                  be16_to_cpu(port));
4718         return;
4719 err:
4720         be_disable_vxlan_offloads(adapter);
4721 }
4722
4723 static void be_del_vxlan_port(struct net_device *netdev, sa_family_t sa_family,
4724                               __be16 port)
4725 {
4726         struct be_adapter *adapter = netdev_priv(netdev);
4727
4728         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4729                 return;
4730
4731         if (adapter->vxlan_port != port)
4732                 goto done;
4733
4734         if (adapter->vxlan_port_aliases) {
4735                 adapter->vxlan_port_aliases--;
4736                 return;
4737         }
4738
4739         be_disable_vxlan_offloads(adapter);
4740
4741         dev_info(&adapter->pdev->dev,
4742                  "Disabled VxLAN offloads for UDP port %d\n",
4743                  be16_to_cpu(port));
4744 done:
4745         adapter->vxlan_port_count--;
4746 }
4747
4748 static netdev_features_t be_features_check(struct sk_buff *skb,
4749                                            struct net_device *dev,
4750                                            netdev_features_t features)
4751 {
4752         struct be_adapter *adapter = netdev_priv(dev);
4753         u8 l4_hdr = 0;
4754
4755         /* The code below restricts offload features for some tunneled packets.
4756          * Offload features for normal (non tunnel) packets are unchanged.
4757          */
4758         if (!skb->encapsulation ||
4759             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4760                 return features;
4761
4762         /* It's an encapsulated packet and VxLAN offloads are enabled. We
4763          * should disable tunnel offload features if it's not a VxLAN packet,
4764          * as tunnel offloads have been enabled only for VxLAN. This is done to
4765          * allow other tunneled traffic like GRE work fine while VxLAN
4766          * offloads are configured in Skyhawk-R.
4767          */
4768         switch (vlan_get_protocol(skb)) {
4769         case htons(ETH_P_IP):
4770                 l4_hdr = ip_hdr(skb)->protocol;
4771                 break;
4772         case htons(ETH_P_IPV6):
4773                 l4_hdr = ipv6_hdr(skb)->nexthdr;
4774                 break;
4775         default:
4776                 return features;
4777         }
4778
4779         if (l4_hdr != IPPROTO_UDP ||
4780             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4781             skb->inner_protocol != htons(ETH_P_TEB) ||
4782             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4783             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4784                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4785
4786         return features;
4787 }
4788 #endif
4789
4790 static int be_get_phys_port_id(struct net_device *dev,
4791                                struct netdev_phys_item_id *ppid)
4792 {
4793         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4794         struct be_adapter *adapter = netdev_priv(dev);
4795         u8 *id;
4796
4797         if (MAX_PHYS_ITEM_ID_LEN < id_len)
4798                 return -ENOSPC;
4799
4800         ppid->id[0] = adapter->hba_port_num + 1;
4801         id = &ppid->id[1];
4802         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4803              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4804                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4805
4806         ppid->id_len = id_len;
4807
4808         return 0;
4809 }
4810
4811 static const struct net_device_ops be_netdev_ops = {
4812         .ndo_open               = be_open,
4813         .ndo_stop               = be_close,
4814         .ndo_start_xmit         = be_xmit,
4815         .ndo_set_rx_mode        = be_set_rx_mode,
4816         .ndo_set_mac_address    = be_mac_addr_set,
4817         .ndo_change_mtu         = be_change_mtu,
4818         .ndo_get_stats64        = be_get_stats64,
4819         .ndo_validate_addr      = eth_validate_addr,
4820         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
4821         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
4822         .ndo_set_vf_mac         = be_set_vf_mac,
4823         .ndo_set_vf_vlan        = be_set_vf_vlan,
4824         .ndo_set_vf_rate        = be_set_vf_tx_rate,
4825         .ndo_get_vf_config      = be_get_vf_config,
4826         .ndo_set_vf_link_state  = be_set_vf_link_state,
4827         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4828 #ifdef CONFIG_NET_POLL_CONTROLLER
4829         .ndo_poll_controller    = be_netpoll,
4830 #endif
4831         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
4832         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
4833 #ifdef CONFIG_NET_RX_BUSY_POLL
4834         .ndo_busy_poll          = be_busy_poll,
4835 #endif
4836 #ifdef CONFIG_BE2NET_VXLAN
4837         .ndo_add_vxlan_port     = be_add_vxlan_port,
4838         .ndo_del_vxlan_port     = be_del_vxlan_port,
4839         .ndo_features_check     = be_features_check,
4840 #endif
4841         .ndo_get_phys_port_id   = be_get_phys_port_id,
4842 };
4843
4844 static void be_netdev_init(struct net_device *netdev)
4845 {
4846         struct be_adapter *adapter = netdev_priv(netdev);
4847
4848         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4849                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
4850                 NETIF_F_HW_VLAN_CTAG_TX;
4851         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
4852                 netdev->hw_features |= NETIF_F_RXHASH;
4853
4854         netdev->features |= netdev->hw_features |
4855                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
4856
4857         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4858                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4859
4860         netdev->priv_flags |= IFF_UNICAST_FLT;
4861
4862         netdev->flags |= IFF_MULTICAST;
4863
4864         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
4865
4866         netdev->netdev_ops = &be_netdev_ops;
4867
4868         netdev->ethtool_ops = &be_ethtool_ops;
4869 }
4870
4871 static void be_cleanup(struct be_adapter *adapter)
4872 {
4873         struct net_device *netdev = adapter->netdev;
4874
4875         rtnl_lock();
4876         netif_device_detach(netdev);
4877         if (netif_running(netdev))
4878                 be_close(netdev);
4879         rtnl_unlock();
4880
4881         be_clear(adapter);
4882 }
4883
4884 static int be_resume(struct be_adapter *adapter)
4885 {
4886         struct net_device *netdev = adapter->netdev;
4887         int status;
4888
4889         status = be_setup(adapter);
4890         if (status)
4891                 return status;
4892
4893         rtnl_lock();
4894         if (netif_running(netdev))
4895                 status = be_open(netdev);
4896         rtnl_unlock();
4897
4898         if (status)
4899                 return status;
4900
4901         netif_device_attach(netdev);
4902
4903         return 0;
4904 }
4905
4906 static int be_err_recover(struct be_adapter *adapter)
4907 {
4908         int status;
4909
4910         /* Error recovery is supported only Lancer as of now */
4911         if (!lancer_chip(adapter))
4912                 return -EIO;
4913
4914         /* Wait for adapter to reach quiescent state before
4915          * destroying queues
4916          */
4917         status = be_fw_wait_ready(adapter);
4918         if (status)
4919                 goto err;
4920
4921         be_cleanup(adapter);
4922
4923         status = be_resume(adapter);
4924         if (status)
4925                 goto err;
4926
4927         return 0;
4928 err:
4929         return status;
4930 }
4931
4932 static void be_err_detection_task(struct work_struct *work)
4933 {
4934         struct be_adapter *adapter =
4935                                 container_of(work, struct be_adapter,
4936                                              be_err_detection_work.work);
4937         struct device *dev = &adapter->pdev->dev;
4938         int recovery_status;
4939         int delay = ERR_DETECTION_DELAY;
4940
4941         be_detect_error(adapter);
4942
4943         if (be_check_error(adapter, BE_ERROR_HW))
4944                 recovery_status = be_err_recover(adapter);
4945         else
4946                 goto reschedule_task;
4947
4948         if (!recovery_status) {
4949                 adapter->recovery_retries = 0;
4950                 dev_info(dev, "Adapter recovery successful\n");
4951                 goto reschedule_task;
4952         } else if (be_virtfn(adapter)) {
4953                 /* For VFs, check if PF have allocated resources
4954                  * every second.
4955                  */
4956                 dev_err(dev, "Re-trying adapter recovery\n");
4957                 goto reschedule_task;
4958         } else if (adapter->recovery_retries++ <
4959                    MAX_ERR_RECOVERY_RETRY_COUNT) {
4960                 /* In case of another error during recovery, it takes 30 sec
4961                  * for adapter to come out of error. Retry error recovery after
4962                  * this time interval.
4963                  */
4964                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
4965                 delay = ERR_RECOVERY_RETRY_DELAY;
4966                 goto reschedule_task;
4967         } else {
4968                 dev_err(dev, "Adapter recovery failed\n");
4969         }
4970
4971         return;
4972 reschedule_task:
4973         be_schedule_err_detection(adapter, delay);
4974 }
4975
4976 static void be_log_sfp_info(struct be_adapter *adapter)
4977 {
4978         int status;
4979
4980         status = be_cmd_query_sfp_info(adapter);
4981         if (!status) {
4982                 dev_err(&adapter->pdev->dev,
4983                         "Port %c: %s Vendor: %s part no: %s",
4984                         adapter->port_name,
4985                         be_misconfig_evt_port_state[adapter->phy_state],
4986                         adapter->phy.vendor_name,
4987                         adapter->phy.vendor_pn);
4988         }
4989         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
4990 }
4991
4992 static void be_worker(struct work_struct *work)
4993 {
4994         struct be_adapter *adapter =
4995                 container_of(work, struct be_adapter, work.work);
4996         struct be_rx_obj *rxo;
4997         int i;
4998
4999         /* when interrupts are not yet enabled, just reap any pending
5000          * mcc completions
5001          */
5002         if (!netif_running(adapter->netdev)) {
5003                 local_bh_disable();
5004                 be_process_mcc(adapter);
5005                 local_bh_enable();
5006                 goto reschedule;
5007         }
5008
5009         if (!adapter->stats_cmd_sent) {
5010                 if (lancer_chip(adapter))
5011                         lancer_cmd_get_pport_stats(adapter,
5012                                                    &adapter->stats_cmd);
5013                 else
5014                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5015         }
5016
5017         if (be_physfn(adapter) &&
5018             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5019                 be_cmd_get_die_temperature(adapter);
5020
5021         for_all_rx_queues(adapter, rxo, i) {
5022                 /* Replenish RX-queues starved due to memory
5023                  * allocation failures.
5024                  */
5025                 if (rxo->rx_post_starved)
5026                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5027         }
5028
5029         /* EQ-delay update for Skyhawk is done while notifying EQ */
5030         if (!skyhawk_chip(adapter))
5031                 be_eqd_update(adapter, false);
5032
5033         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5034                 be_log_sfp_info(adapter);
5035
5036 reschedule:
5037         adapter->work_counter++;
5038         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
5039 }
5040
5041 static void be_unmap_pci_bars(struct be_adapter *adapter)
5042 {
5043         if (adapter->csr)
5044                 pci_iounmap(adapter->pdev, adapter->csr);
5045         if (adapter->db)
5046                 pci_iounmap(adapter->pdev, adapter->db);
5047         if (adapter->pcicfg && adapter->pcicfg_mapped)
5048                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5049 }
5050
5051 static int db_bar(struct be_adapter *adapter)
5052 {
5053         if (lancer_chip(adapter) || be_virtfn(adapter))
5054                 return 0;
5055         else
5056                 return 4;
5057 }
5058
5059 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5060 {
5061         if (skyhawk_chip(adapter)) {
5062                 adapter->roce_db.size = 4096;
5063                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5064                                                               db_bar(adapter));
5065                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5066                                                                db_bar(adapter));
5067         }
5068         return 0;
5069 }
5070
5071 static int be_map_pci_bars(struct be_adapter *adapter)
5072 {
5073         struct pci_dev *pdev = adapter->pdev;
5074         u8 __iomem *addr;
5075         u32 sli_intf;
5076
5077         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5078         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5079                                 SLI_INTF_FAMILY_SHIFT;
5080         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5081
5082         if (BEx_chip(adapter) && be_physfn(adapter)) {
5083                 adapter->csr = pci_iomap(pdev, 2, 0);
5084                 if (!adapter->csr)
5085                         return -ENOMEM;
5086         }
5087
5088         addr = pci_iomap(pdev, db_bar(adapter), 0);
5089         if (!addr)
5090                 goto pci_map_err;
5091         adapter->db = addr;
5092
5093         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5094                 if (be_physfn(adapter)) {
5095                         /* PCICFG is the 2nd BAR in BE2 */
5096                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5097                         if (!addr)
5098                                 goto pci_map_err;
5099                         adapter->pcicfg = addr;
5100                         adapter->pcicfg_mapped = true;
5101                 } else {
5102                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5103                         adapter->pcicfg_mapped = false;
5104                 }
5105         }
5106
5107         be_roce_map_pci_bars(adapter);
5108         return 0;
5109
5110 pci_map_err:
5111         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5112         be_unmap_pci_bars(adapter);
5113         return -ENOMEM;
5114 }
5115
5116 static void be_drv_cleanup(struct be_adapter *adapter)
5117 {
5118         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5119         struct device *dev = &adapter->pdev->dev;
5120
5121         if (mem->va)
5122                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5123
5124         mem = &adapter->rx_filter;
5125         if (mem->va)
5126                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5127
5128         mem = &adapter->stats_cmd;
5129         if (mem->va)
5130                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5131 }
5132
5133 /* Allocate and initialize various fields in be_adapter struct */
5134 static int be_drv_init(struct be_adapter *adapter)
5135 {
5136         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5137         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5138         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5139         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5140         struct device *dev = &adapter->pdev->dev;
5141         int status = 0;
5142
5143         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5144         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5145                                                  &mbox_mem_alloc->dma,
5146                                                  GFP_KERNEL);
5147         if (!mbox_mem_alloc->va)
5148                 return -ENOMEM;
5149
5150         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5151         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5152         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5153
5154         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5155         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5156                                             &rx_filter->dma, GFP_KERNEL);
5157         if (!rx_filter->va) {
5158                 status = -ENOMEM;
5159                 goto free_mbox;
5160         }
5161
5162         if (lancer_chip(adapter))
5163                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5164         else if (BE2_chip(adapter))
5165                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5166         else if (BE3_chip(adapter))
5167                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5168         else
5169                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5170         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5171                                             &stats_cmd->dma, GFP_KERNEL);
5172         if (!stats_cmd->va) {
5173                 status = -ENOMEM;
5174                 goto free_rx_filter;
5175         }
5176
5177         mutex_init(&adapter->mbox_lock);
5178         spin_lock_init(&adapter->mcc_lock);
5179         spin_lock_init(&adapter->mcc_cq_lock);
5180         init_completion(&adapter->et_cmd_compl);
5181
5182         pci_save_state(adapter->pdev);
5183
5184         INIT_DELAYED_WORK(&adapter->work, be_worker);
5185         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5186                           be_err_detection_task);
5187
5188         adapter->rx_fc = true;
5189         adapter->tx_fc = true;
5190
5191         /* Must be a power of 2 or else MODULO will BUG_ON */
5192         adapter->be_get_temp_freq = 64;
5193
5194         return 0;
5195
5196 free_rx_filter:
5197         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5198 free_mbox:
5199         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5200                           mbox_mem_alloc->dma);
5201         return status;
5202 }
5203
5204 static void be_remove(struct pci_dev *pdev)
5205 {
5206         struct be_adapter *adapter = pci_get_drvdata(pdev);
5207
5208         if (!adapter)
5209                 return;
5210
5211         be_roce_dev_remove(adapter);
5212         be_intr_set(adapter, false);
5213
5214         be_cancel_err_detection(adapter);
5215
5216         unregister_netdev(adapter->netdev);
5217
5218         be_clear(adapter);
5219
5220         /* tell fw we're done with firing cmds */
5221         be_cmd_fw_clean(adapter);
5222
5223         be_unmap_pci_bars(adapter);
5224         be_drv_cleanup(adapter);
5225
5226         pci_disable_pcie_error_reporting(pdev);
5227
5228         pci_release_regions(pdev);
5229         pci_disable_device(pdev);
5230
5231         free_netdev(adapter->netdev);
5232 }
5233
5234 static ssize_t be_hwmon_show_temp(struct device *dev,
5235                                   struct device_attribute *dev_attr,
5236                                   char *buf)
5237 {
5238         struct be_adapter *adapter = dev_get_drvdata(dev);
5239
5240         /* Unit: millidegree Celsius */
5241         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5242                 return -EIO;
5243         else
5244                 return sprintf(buf, "%u\n",
5245                                adapter->hwmon_info.be_on_die_temp * 1000);
5246 }
5247
5248 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5249                           be_hwmon_show_temp, NULL, 1);
5250
5251 static struct attribute *be_hwmon_attrs[] = {
5252         &sensor_dev_attr_temp1_input.dev_attr.attr,
5253         NULL
5254 };
5255
5256 ATTRIBUTE_GROUPS(be_hwmon);
5257
5258 static char *mc_name(struct be_adapter *adapter)
5259 {
5260         char *str = ""; /* default */
5261
5262         switch (adapter->mc_type) {
5263         case UMC:
5264                 str = "UMC";
5265                 break;
5266         case FLEX10:
5267                 str = "FLEX10";
5268                 break;
5269         case vNIC1:
5270                 str = "vNIC-1";
5271                 break;
5272         case nPAR:
5273                 str = "nPAR";
5274                 break;
5275         case UFP:
5276                 str = "UFP";
5277                 break;
5278         case vNIC2:
5279                 str = "vNIC-2";
5280                 break;
5281         default:
5282                 str = "";
5283         }
5284
5285         return str;
5286 }
5287
5288 static inline char *func_name(struct be_adapter *adapter)
5289 {
5290         return be_physfn(adapter) ? "PF" : "VF";
5291 }
5292
5293 static inline char *nic_name(struct pci_dev *pdev)
5294 {
5295         switch (pdev->device) {
5296         case OC_DEVICE_ID1:
5297                 return OC_NAME;
5298         case OC_DEVICE_ID2:
5299                 return OC_NAME_BE;
5300         case OC_DEVICE_ID3:
5301         case OC_DEVICE_ID4:
5302                 return OC_NAME_LANCER;
5303         case BE_DEVICE_ID2:
5304                 return BE3_NAME;
5305         case OC_DEVICE_ID5:
5306         case OC_DEVICE_ID6:
5307                 return OC_NAME_SH;
5308         default:
5309                 return BE_NAME;
5310         }
5311 }
5312
5313 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5314 {
5315         struct be_adapter *adapter;
5316         struct net_device *netdev;
5317         int status = 0;
5318
5319         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5320
5321         status = pci_enable_device(pdev);
5322         if (status)
5323                 goto do_none;
5324
5325         status = pci_request_regions(pdev, DRV_NAME);
5326         if (status)
5327                 goto disable_dev;
5328         pci_set_master(pdev);
5329
5330         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5331         if (!netdev) {
5332                 status = -ENOMEM;
5333                 goto rel_reg;
5334         }
5335         adapter = netdev_priv(netdev);
5336         adapter->pdev = pdev;
5337         pci_set_drvdata(pdev, adapter);
5338         adapter->netdev = netdev;
5339         SET_NETDEV_DEV(netdev, &pdev->dev);
5340
5341         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5342         if (!status) {
5343                 netdev->features |= NETIF_F_HIGHDMA;
5344         } else {
5345                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5346                 if (status) {
5347                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5348                         goto free_netdev;
5349                 }
5350         }
5351
5352         status = pci_enable_pcie_error_reporting(pdev);
5353         if (!status)
5354                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5355
5356         status = be_map_pci_bars(adapter);
5357         if (status)
5358                 goto free_netdev;
5359
5360         status = be_drv_init(adapter);
5361         if (status)
5362                 goto unmap_bars;
5363
5364         status = be_setup(adapter);
5365         if (status)
5366                 goto drv_cleanup;
5367
5368         be_netdev_init(netdev);
5369         status = register_netdev(netdev);
5370         if (status != 0)
5371                 goto unsetup;
5372
5373         be_roce_dev_add(adapter);
5374
5375         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5376
5377         /* On Die temperature not supported for VF. */
5378         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5379                 adapter->hwmon_info.hwmon_dev =
5380                         devm_hwmon_device_register_with_groups(&pdev->dev,
5381                                                                DRV_NAME,
5382                                                                adapter,
5383                                                                be_hwmon_groups);
5384                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5385         }
5386
5387         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5388                  func_name(adapter), mc_name(adapter), adapter->port_name);
5389
5390         return 0;
5391
5392 unsetup:
5393         be_clear(adapter);
5394 drv_cleanup:
5395         be_drv_cleanup(adapter);
5396 unmap_bars:
5397         be_unmap_pci_bars(adapter);
5398 free_netdev:
5399         free_netdev(netdev);
5400 rel_reg:
5401         pci_release_regions(pdev);
5402 disable_dev:
5403         pci_disable_device(pdev);
5404 do_none:
5405         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5406         return status;
5407 }
5408
5409 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5410 {
5411         struct be_adapter *adapter = pci_get_drvdata(pdev);
5412
5413         if (adapter->wol_en)
5414                 be_setup_wol(adapter, true);
5415
5416         be_intr_set(adapter, false);
5417         be_cancel_err_detection(adapter);
5418
5419         be_cleanup(adapter);
5420
5421         pci_save_state(pdev);
5422         pci_disable_device(pdev);
5423         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5424         return 0;
5425 }
5426
5427 static int be_pci_resume(struct pci_dev *pdev)
5428 {
5429         struct be_adapter *adapter = pci_get_drvdata(pdev);
5430         int status = 0;
5431
5432         status = pci_enable_device(pdev);
5433         if (status)
5434                 return status;
5435
5436         pci_restore_state(pdev);
5437
5438         status = be_resume(adapter);
5439         if (status)
5440                 return status;
5441
5442         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5443
5444         if (adapter->wol_en)
5445                 be_setup_wol(adapter, false);
5446
5447         return 0;
5448 }
5449
5450 /*
5451  * An FLR will stop BE from DMAing any data.
5452  */
5453 static void be_shutdown(struct pci_dev *pdev)
5454 {
5455         struct be_adapter *adapter = pci_get_drvdata(pdev);
5456
5457         if (!adapter)
5458                 return;
5459
5460         be_roce_dev_shutdown(adapter);
5461         cancel_delayed_work_sync(&adapter->work);
5462         be_cancel_err_detection(adapter);
5463
5464         netif_device_detach(adapter->netdev);
5465
5466         be_cmd_reset_function(adapter);
5467
5468         pci_disable_device(pdev);
5469 }
5470
5471 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5472                                             pci_channel_state_t state)
5473 {
5474         struct be_adapter *adapter = pci_get_drvdata(pdev);
5475
5476         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5477
5478         be_roce_dev_remove(adapter);
5479
5480         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5481                 be_set_error(adapter, BE_ERROR_EEH);
5482
5483                 be_cancel_err_detection(adapter);
5484
5485                 be_cleanup(adapter);
5486         }
5487
5488         if (state == pci_channel_io_perm_failure)
5489                 return PCI_ERS_RESULT_DISCONNECT;
5490
5491         pci_disable_device(pdev);
5492
5493         /* The error could cause the FW to trigger a flash debug dump.
5494          * Resetting the card while flash dump is in progress
5495          * can cause it not to recover; wait for it to finish.
5496          * Wait only for first function as it is needed only once per
5497          * adapter.
5498          */
5499         if (pdev->devfn == 0)
5500                 ssleep(30);
5501
5502         return PCI_ERS_RESULT_NEED_RESET;
5503 }
5504
5505 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5506 {
5507         struct be_adapter *adapter = pci_get_drvdata(pdev);
5508         int status;
5509
5510         dev_info(&adapter->pdev->dev, "EEH reset\n");
5511
5512         status = pci_enable_device(pdev);
5513         if (status)
5514                 return PCI_ERS_RESULT_DISCONNECT;
5515
5516         pci_set_master(pdev);
5517         pci_restore_state(pdev);
5518
5519         /* Check if card is ok and fw is ready */
5520         dev_info(&adapter->pdev->dev,
5521                  "Waiting for FW to be ready after EEH reset\n");
5522         status = be_fw_wait_ready(adapter);
5523         if (status)
5524                 return PCI_ERS_RESULT_DISCONNECT;
5525
5526         pci_cleanup_aer_uncorrect_error_status(pdev);
5527         be_clear_error(adapter, BE_CLEAR_ALL);
5528         return PCI_ERS_RESULT_RECOVERED;
5529 }
5530
5531 static void be_eeh_resume(struct pci_dev *pdev)
5532 {
5533         int status = 0;
5534         struct be_adapter *adapter = pci_get_drvdata(pdev);
5535
5536         dev_info(&adapter->pdev->dev, "EEH resume\n");
5537
5538         pci_save_state(pdev);
5539
5540         status = be_resume(adapter);
5541         if (status)
5542                 goto err;
5543
5544         be_roce_dev_add(adapter);
5545
5546         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5547         return;
5548 err:
5549         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5550 }
5551
5552 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5553 {
5554         struct be_adapter *adapter = pci_get_drvdata(pdev);
5555         u16 num_vf_qs;
5556         int status;
5557
5558         if (!num_vfs)
5559                 be_vf_clear(adapter);
5560
5561         adapter->num_vfs = num_vfs;
5562
5563         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5564                 dev_warn(&pdev->dev,
5565                          "Cannot disable VFs while they are assigned\n");
5566                 return -EBUSY;
5567         }
5568
5569         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5570          * are equally distributed across the max-number of VFs. The user may
5571          * request only a subset of the max-vfs to be enabled.
5572          * Based on num_vfs, redistribute the resources across num_vfs so that
5573          * each VF will have access to more number of resources.
5574          * This facility is not available in BE3 FW.
5575          * Also, this is done by FW in Lancer chip.
5576          */
5577         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5578                 num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs);
5579                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5580                                                  adapter->num_vfs, num_vf_qs);
5581                 if (status)
5582                         dev_err(&pdev->dev,
5583                                 "Failed to optimize SR-IOV resources\n");
5584         }
5585
5586         status = be_get_resources(adapter);
5587         if (status)
5588                 return be_cmd_status(status);
5589
5590         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5591         rtnl_lock();
5592         status = be_update_queues(adapter);
5593         rtnl_unlock();
5594         if (status)
5595                 return be_cmd_status(status);
5596
5597         if (adapter->num_vfs)
5598                 status = be_vf_setup(adapter);
5599
5600         if (!status)
5601                 return adapter->num_vfs;
5602
5603         return 0;
5604 }
5605
5606 static const struct pci_error_handlers be_eeh_handlers = {
5607         .error_detected = be_eeh_err_detected,
5608         .slot_reset = be_eeh_reset,
5609         .resume = be_eeh_resume,
5610 };
5611
5612 static struct pci_driver be_driver = {
5613         .name = DRV_NAME,
5614         .id_table = be_dev_ids,
5615         .probe = be_probe,
5616         .remove = be_remove,
5617         .suspend = be_suspend,
5618         .resume = be_pci_resume,
5619         .shutdown = be_shutdown,
5620         .sriov_configure = be_pci_sriov_configure,
5621         .err_handler = &be_eeh_handlers
5622 };
5623
5624 static int __init be_init_module(void)
5625 {
5626         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5627             rx_frag_size != 2048) {
5628                 printk(KERN_WARNING DRV_NAME
5629                         " : Module param rx_frag_size must be 2048/4096/8192."
5630                         " Using 2048\n");
5631                 rx_frag_size = 2048;
5632         }
5633
5634         if (num_vfs > 0) {
5635                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5636                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5637         }
5638
5639         return pci_register_driver(&be_driver);
5640 }
5641 module_init(be_init_module);
5642
5643 static void __exit be_exit_module(void)
5644 {
5645         pci_unregister_driver(&be_driver);
5646 }
5647 module_exit(be_exit_module);