243d43bfd97534a92ecdf43fe6cdbb9d73da5262
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58         "CEV",
59         "CTX",
60         "DBUF",
61         "ERX",
62         "Host",
63         "MPU",
64         "NDMA",
65         "PTC ",
66         "RDMA ",
67         "RXF ",
68         "RXIPS ",
69         "RXULP0 ",
70         "RXULP1 ",
71         "RXULP2 ",
72         "TIM ",
73         "TPOST ",
74         "TPRE ",
75         "TXIPS ",
76         "TXULP0 ",
77         "TXULP1 ",
78         "UC ",
79         "WDMA ",
80         "TXULP2 ",
81         "HOST1 ",
82         "P0_OB_LINK ",
83         "P1_OB_LINK ",
84         "HOST_GPIO ",
85         "MBOX ",
86         "ERX2 ",
87         "SPARE ",
88         "JTAG ",
89         "MPU_INTPEND "
90 };
91
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94         "LPCMEMHOST",
95         "MGMT_MAC",
96         "PCS0ONLINE",
97         "MPU_IRAM",
98         "PCS1ONLINE",
99         "PCTL0",
100         "PCTL1",
101         "PMEM",
102         "RR",
103         "TXPB",
104         "RXPP",
105         "XAUI",
106         "TXP",
107         "ARM",
108         "IPC",
109         "HOST2",
110         "HOST3",
111         "HOST4",
112         "HOST5",
113         "HOST6",
114         "HOST7",
115         "ECRC",
116         "Poison TLP",
117         "NETC",
118         "PERIPH",
119         "LLTXULP",
120         "D2P",
121         "RCON",
122         "LDMA",
123         "LLTXP",
124         "LLTXPB",
125         "Unknown"
126 };
127
128 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
129                                  BE_IF_FLAGS_BROADCAST | \
130                                  BE_IF_FLAGS_MULTICAST | \
131                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
132
133 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
134 {
135         struct be_dma_mem *mem = &q->dma_mem;
136
137         if (mem->va) {
138                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
139                                   mem->dma);
140                 mem->va = NULL;
141         }
142 }
143
144 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
145                           u16 len, u16 entry_size)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         memset(q, 0, sizeof(*q));
150         q->len = len;
151         q->entry_size = entry_size;
152         mem->size = len * entry_size;
153         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
154                                       GFP_KERNEL);
155         if (!mem->va)
156                 return -ENOMEM;
157         return 0;
158 }
159
160 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
161 {
162         u32 reg, enabled;
163
164         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
165                               &reg);
166         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167
168         if (!enabled && enable)
169                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
170         else if (enabled && !enable)
171                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
172         else
173                 return;
174
175         pci_write_config_dword(adapter->pdev,
176                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
177 }
178
179 static void be_intr_set(struct be_adapter *adapter, bool enable)
180 {
181         int status = 0;
182
183         /* On lancer interrupts can't be controlled via this register */
184         if (lancer_chip(adapter))
185                 return;
186
187         if (be_check_error(adapter, BE_ERROR_EEH))
188                 return;
189
190         status = be_cmd_intr_set(adapter, enable);
191         if (status)
192                 be_reg_intr_set(adapter, enable);
193 }
194
195 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
196 {
197         u32 val = 0;
198
199         if (be_check_error(adapter, BE_ERROR_HW))
200                 return;
201
202         val |= qid & DB_RQ_RING_ID_MASK;
203         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
204
205         wmb();
206         iowrite32(val, adapter->db + DB_RQ_OFFSET);
207 }
208
209 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
210                           u16 posted)
211 {
212         u32 val = 0;
213
214         if (be_check_error(adapter, BE_ERROR_HW))
215                 return;
216
217         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
218         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
219
220         wmb();
221         iowrite32(val, adapter->db + txo->db_offset);
222 }
223
224 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
225                          bool arm, bool clear_int, u16 num_popped,
226                          u32 eq_delay_mult_enc)
227 {
228         u32 val = 0;
229
230         val |= qid & DB_EQ_RING_ID_MASK;
231         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
232
233         if (be_check_error(adapter, BE_ERROR_HW))
234                 return;
235
236         if (arm)
237                 val |= 1 << DB_EQ_REARM_SHIFT;
238         if (clear_int)
239                 val |= 1 << DB_EQ_CLR_SHIFT;
240         val |= 1 << DB_EQ_EVNT_SHIFT;
241         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
242         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
243         iowrite32(val, adapter->db + DB_EQ_OFFSET);
244 }
245
246 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
247 {
248         u32 val = 0;
249
250         val |= qid & DB_CQ_RING_ID_MASK;
251         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
252                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
253
254         if (be_check_error(adapter, BE_ERROR_HW))
255                 return;
256
257         if (arm)
258                 val |= 1 << DB_CQ_REARM_SHIFT;
259         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
260         iowrite32(val, adapter->db + DB_CQ_OFFSET);
261 }
262
263 static int be_mac_addr_set(struct net_device *netdev, void *p)
264 {
265         struct be_adapter *adapter = netdev_priv(netdev);
266         struct device *dev = &adapter->pdev->dev;
267         struct sockaddr *addr = p;
268         int status;
269         u8 mac[ETH_ALEN];
270         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
271
272         if (!is_valid_ether_addr(addr->sa_data))
273                 return -EADDRNOTAVAIL;
274
275         /* Proceed further only if, User provided MAC is different
276          * from active MAC
277          */
278         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
279                 return 0;
280
281         /* if device is not running, copy MAC to netdev->dev_addr */
282         if (!netif_running(netdev))
283                 goto done;
284
285         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
286          * privilege or if PF did not provision the new MAC address.
287          * On BE3, this cmd will always fail if the VF doesn't have the
288          * FILTMGMT privilege. This failure is OK, only if the PF programmed
289          * the MAC for the VF.
290          */
291         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
292                                  adapter->if_handle, &adapter->pmac_id[0], 0);
293         if (!status) {
294                 curr_pmac_id = adapter->pmac_id[0];
295
296                 /* Delete the old programmed MAC. This call may fail if the
297                  * old MAC was already deleted by the PF driver.
298                  */
299                 if (adapter->pmac_id[0] != old_pmac_id)
300                         be_cmd_pmac_del(adapter, adapter->if_handle,
301                                         old_pmac_id, 0);
302         }
303
304         /* Decide if the new MAC is successfully activated only after
305          * querying the FW
306          */
307         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
308                                        adapter->if_handle, true, 0);
309         if (status)
310                 goto err;
311
312         /* The MAC change did not happen, either due to lack of privilege
313          * or PF didn't pre-provision.
314          */
315         if (!ether_addr_equal(addr->sa_data, mac)) {
316                 status = -EPERM;
317                 goto err;
318         }
319 done:
320         ether_addr_copy(netdev->dev_addr, addr->sa_data);
321         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
322         return 0;
323 err:
324         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
325         return status;
326 }
327
328 /* BE2 supports only v0 cmd */
329 static void *hw_stats_from_cmd(struct be_adapter *adapter)
330 {
331         if (BE2_chip(adapter)) {
332                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
333
334                 return &cmd->hw_stats;
335         } else if (BE3_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else {
340                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         }
344 }
345
346 /* BE2 supports only v0 cmd */
347 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
348 {
349         if (BE2_chip(adapter)) {
350                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
351
352                 return &hw_stats->erx;
353         } else if (BE3_chip(adapter)) {
354                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else {
358                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         }
362 }
363
364 static void populate_be_v0_stats(struct be_adapter *adapter)
365 {
366         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
367         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
368         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
369         struct be_port_rxf_stats_v0 *port_stats =
370                                         &rxf_stats->port[adapter->port_num];
371         struct be_drv_stats *drvs = &adapter->drv_stats;
372
373         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
374         drvs->rx_pause_frames = port_stats->rx_pause_frames;
375         drvs->rx_crc_errors = port_stats->rx_crc_errors;
376         drvs->rx_control_frames = port_stats->rx_control_frames;
377         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
378         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
379         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
380         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
381         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
382         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
383         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
384         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
385         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
386         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
387         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
388         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
389         drvs->rx_dropped_header_too_small =
390                 port_stats->rx_dropped_header_too_small;
391         drvs->rx_address_filtered =
392                                         port_stats->rx_address_filtered +
393                                         port_stats->rx_vlan_filtered;
394         drvs->rx_alignment_symbol_errors =
395                 port_stats->rx_alignment_symbol_errors;
396
397         drvs->tx_pauseframes = port_stats->tx_pauseframes;
398         drvs->tx_controlframes = port_stats->tx_controlframes;
399
400         if (adapter->port_num)
401                 drvs->jabber_events = rxf_stats->port1_jabber_events;
402         else
403                 drvs->jabber_events = rxf_stats->port0_jabber_events;
404         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
405         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
406         drvs->forwarded_packets = rxf_stats->forwarded_packets;
407         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
408         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
409         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
410         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
411 }
412
413 static void populate_be_v1_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v1 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
424         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
435         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
436         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
437         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_input_fifo_overflow_drop =
441                 port_stats->rx_input_fifo_overflow_drop;
442         drvs->rx_address_filtered = port_stats->rx_address_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
449         drvs->jabber_events = port_stats->jabber_events;
450         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
451         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
452         drvs->forwarded_packets = rxf_stats->forwarded_packets;
453         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
454         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
455         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
456         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
457 }
458
459 static void populate_be_v2_stats(struct be_adapter *adapter)
460 {
461         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
462         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
463         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
464         struct be_port_rxf_stats_v2 *port_stats =
465                                         &rxf_stats->port[adapter->port_num];
466         struct be_drv_stats *drvs = &adapter->drv_stats;
467
468         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
469         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
470         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
471         drvs->rx_pause_frames = port_stats->rx_pause_frames;
472         drvs->rx_crc_errors = port_stats->rx_crc_errors;
473         drvs->rx_control_frames = port_stats->rx_control_frames;
474         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
475         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
476         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
477         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
478         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
479         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
480         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
481         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
482         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
483         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
484         drvs->rx_dropped_header_too_small =
485                 port_stats->rx_dropped_header_too_small;
486         drvs->rx_input_fifo_overflow_drop =
487                 port_stats->rx_input_fifo_overflow_drop;
488         drvs->rx_address_filtered = port_stats->rx_address_filtered;
489         drvs->rx_alignment_symbol_errors =
490                 port_stats->rx_alignment_symbol_errors;
491         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
492         drvs->tx_pauseframes = port_stats->tx_pauseframes;
493         drvs->tx_controlframes = port_stats->tx_controlframes;
494         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
495         drvs->jabber_events = port_stats->jabber_events;
496         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
497         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
498         drvs->forwarded_packets = rxf_stats->forwarded_packets;
499         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
500         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
501         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
502         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
503         if (be_roce_supported(adapter)) {
504                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
505                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
506                 drvs->rx_roce_frames = port_stats->roce_frames_received;
507                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
508                 drvs->roce_drops_payload_len =
509                         port_stats->roce_drops_payload_len;
510         }
511 }
512
513 static void populate_lancer_stats(struct be_adapter *adapter)
514 {
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
517
518         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
519         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
520         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
521         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
522         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
523         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
524         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
525         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
526         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
527         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
528         drvs->rx_dropped_tcp_length =
529                                 pport_stats->rx_dropped_invalid_tcp_length;
530         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
533         drvs->rx_dropped_header_too_small =
534                                 pport_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536         drvs->rx_address_filtered =
537                                         pport_stats->rx_address_filtered +
538                                         pport_stats->rx_vlan_filtered;
539         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
540         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
541         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
542         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
543         drvs->jabber_events = pport_stats->rx_jabbers;
544         drvs->forwarded_packets = pport_stats->num_forwards_lo;
545         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
546         drvs->rx_drops_too_many_frags =
547                                 pport_stats->rx_drops_too_many_frags_lo;
548 }
549
550 static void accumulate_16bit_val(u32 *acc, u16 val)
551 {
552 #define lo(x)                   (x & 0xFFFF)
553 #define hi(x)                   (x & 0xFFFF0000)
554         bool wrapped = val < lo(*acc);
555         u32 newacc = hi(*acc) + val;
556
557         if (wrapped)
558                 newacc += 65536;
559         ACCESS_ONCE(*acc) = newacc;
560 }
561
562 static void populate_erx_stats(struct be_adapter *adapter,
563                                struct be_rx_obj *rxo, u32 erx_stat)
564 {
565         if (!BEx_chip(adapter))
566                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
567         else
568                 /* below erx HW counter can actually wrap around after
569                  * 65535. Driver accumulates a 32-bit value
570                  */
571                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
572                                      (u16)erx_stat);
573 }
574
575 void be_parse_stats(struct be_adapter *adapter)
576 {
577         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
578         struct be_rx_obj *rxo;
579         int i;
580         u32 erx_stat;
581
582         if (lancer_chip(adapter)) {
583                 populate_lancer_stats(adapter);
584         } else {
585                 if (BE2_chip(adapter))
586                         populate_be_v0_stats(adapter);
587                 else if (BE3_chip(adapter))
588                         /* for BE3 */
589                         populate_be_v1_stats(adapter);
590                 else
591                         populate_be_v2_stats(adapter);
592
593                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
594                 for_all_rx_queues(adapter, rxo, i) {
595                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
596                         populate_erx_stats(adapter, rxo, erx_stat);
597                 }
598         }
599 }
600
601 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
602                                                 struct rtnl_link_stats64 *stats)
603 {
604         struct be_adapter *adapter = netdev_priv(netdev);
605         struct be_drv_stats *drvs = &adapter->drv_stats;
606         struct be_rx_obj *rxo;
607         struct be_tx_obj *txo;
608         u64 pkts, bytes;
609         unsigned int start;
610         int i;
611
612         for_all_rx_queues(adapter, rxo, i) {
613                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
614
615                 do {
616                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
617                         pkts = rx_stats(rxo)->rx_pkts;
618                         bytes = rx_stats(rxo)->rx_bytes;
619                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
620                 stats->rx_packets += pkts;
621                 stats->rx_bytes += bytes;
622                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
623                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
624                                         rx_stats(rxo)->rx_drops_no_frags;
625         }
626
627         for_all_tx_queues(adapter, txo, i) {
628                 const struct be_tx_stats *tx_stats = tx_stats(txo);
629
630                 do {
631                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
632                         pkts = tx_stats(txo)->tx_pkts;
633                         bytes = tx_stats(txo)->tx_bytes;
634                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
635                 stats->tx_packets += pkts;
636                 stats->tx_bytes += bytes;
637         }
638
639         /* bad pkts received */
640         stats->rx_errors = drvs->rx_crc_errors +
641                 drvs->rx_alignment_symbol_errors +
642                 drvs->rx_in_range_errors +
643                 drvs->rx_out_range_errors +
644                 drvs->rx_frame_too_long +
645                 drvs->rx_dropped_too_small +
646                 drvs->rx_dropped_too_short +
647                 drvs->rx_dropped_header_too_small +
648                 drvs->rx_dropped_tcp_length +
649                 drvs->rx_dropped_runt;
650
651         /* detailed rx errors */
652         stats->rx_length_errors = drvs->rx_in_range_errors +
653                 drvs->rx_out_range_errors +
654                 drvs->rx_frame_too_long;
655
656         stats->rx_crc_errors = drvs->rx_crc_errors;
657
658         /* frame alignment errors */
659         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
660
661         /* receiver fifo overrun */
662         /* drops_no_pbuf is no per i/f, it's per BE card */
663         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
664                                 drvs->rx_input_fifo_overflow_drop +
665                                 drvs->rx_drops_no_pbuf;
666         return stats;
667 }
668
669 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
670 {
671         struct net_device *netdev = adapter->netdev;
672
673         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
674                 netif_carrier_off(netdev);
675                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
676         }
677
678         if (link_status)
679                 netif_carrier_on(netdev);
680         else
681                 netif_carrier_off(netdev);
682
683         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
684 }
685
686 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
687 {
688         struct be_tx_stats *stats = tx_stats(txo);
689         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
690
691         u64_stats_update_begin(&stats->sync);
692         stats->tx_reqs++;
693         stats->tx_bytes += skb->len;
694         stats->tx_pkts += tx_pkts;
695         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
696                 stats->tx_vxlan_offload_pkts += tx_pkts;
697         u64_stats_update_end(&stats->sync);
698 }
699
700 /* Returns number of WRBs needed for the skb */
701 static u32 skb_wrb_cnt(struct sk_buff *skb)
702 {
703         /* +1 for the header wrb */
704         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
705 }
706
707 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
708 {
709         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
710         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
711         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
712         wrb->rsvd0 = 0;
713 }
714
715 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
716  * to avoid the swap and shift/mask operations in wrb_fill().
717  */
718 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
719 {
720         wrb->frag_pa_hi = 0;
721         wrb->frag_pa_lo = 0;
722         wrb->frag_len = 0;
723         wrb->rsvd0 = 0;
724 }
725
726 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
727                                      struct sk_buff *skb)
728 {
729         u8 vlan_prio;
730         u16 vlan_tag;
731
732         vlan_tag = skb_vlan_tag_get(skb);
733         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
734         /* If vlan priority provided by OS is NOT in available bmap */
735         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
736                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
737                                 adapter->recommended_prio_bits;
738
739         return vlan_tag;
740 }
741
742 /* Used only for IP tunnel packets */
743 static u16 skb_inner_ip_proto(struct sk_buff *skb)
744 {
745         return (inner_ip_hdr(skb)->version == 4) ?
746                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
747 }
748
749 static u16 skb_ip_proto(struct sk_buff *skb)
750 {
751         return (ip_hdr(skb)->version == 4) ?
752                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
753 }
754
755 static inline bool be_is_txq_full(struct be_tx_obj *txo)
756 {
757         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
758 }
759
760 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
761 {
762         return atomic_read(&txo->q.used) < txo->q.len / 2;
763 }
764
765 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
766 {
767         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
768 }
769
770 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
771                                        struct sk_buff *skb,
772                                        struct be_wrb_params *wrb_params)
773 {
774         u16 proto;
775
776         if (skb_is_gso(skb)) {
777                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
778                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
779                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
780                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
781         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
782                 if (skb->encapsulation) {
783                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
784                         proto = skb_inner_ip_proto(skb);
785                 } else {
786                         proto = skb_ip_proto(skb);
787                 }
788                 if (proto == IPPROTO_TCP)
789                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
790                 else if (proto == IPPROTO_UDP)
791                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
792         }
793
794         if (skb_vlan_tag_present(skb)) {
795                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
796                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
797         }
798
799         BE_WRB_F_SET(wrb_params->features, CRC, 1);
800 }
801
802 static void wrb_fill_hdr(struct be_adapter *adapter,
803                          struct be_eth_hdr_wrb *hdr,
804                          struct be_wrb_params *wrb_params,
805                          struct sk_buff *skb)
806 {
807         memset(hdr, 0, sizeof(*hdr));
808
809         SET_TX_WRB_HDR_BITS(crc, hdr,
810                             BE_WRB_F_GET(wrb_params->features, CRC));
811         SET_TX_WRB_HDR_BITS(ipcs, hdr,
812                             BE_WRB_F_GET(wrb_params->features, IPCS));
813         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
814                             BE_WRB_F_GET(wrb_params->features, TCPCS));
815         SET_TX_WRB_HDR_BITS(udpcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, UDPCS));
817
818         SET_TX_WRB_HDR_BITS(lso, hdr,
819                             BE_WRB_F_GET(wrb_params->features, LSO));
820         SET_TX_WRB_HDR_BITS(lso6, hdr,
821                             BE_WRB_F_GET(wrb_params->features, LSO6));
822         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
823
824         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
825          * hack is not needed, the evt bit is set while ringing DB.
826          */
827         SET_TX_WRB_HDR_BITS(event, hdr,
828                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
829         SET_TX_WRB_HDR_BITS(vlan, hdr,
830                             BE_WRB_F_GET(wrb_params->features, VLAN));
831         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
832
833         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
834         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
835         SET_TX_WRB_HDR_BITS(mgmt, hdr,
836                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
837 }
838
839 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
840                           bool unmap_single)
841 {
842         dma_addr_t dma;
843         u32 frag_len = le32_to_cpu(wrb->frag_len);
844
845
846         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
847                 (u64)le32_to_cpu(wrb->frag_pa_lo);
848         if (frag_len) {
849                 if (unmap_single)
850                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
851                 else
852                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
853         }
854 }
855
856 /* Grab a WRB header for xmit */
857 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
858 {
859         u32 head = txo->q.head;
860
861         queue_head_inc(&txo->q);
862         return head;
863 }
864
865 /* Set up the WRB header for xmit */
866 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
867                                 struct be_tx_obj *txo,
868                                 struct be_wrb_params *wrb_params,
869                                 struct sk_buff *skb, u16 head)
870 {
871         u32 num_frags = skb_wrb_cnt(skb);
872         struct be_queue_info *txq = &txo->q;
873         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
874
875         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
876         be_dws_cpu_to_le(hdr, sizeof(*hdr));
877
878         BUG_ON(txo->sent_skb_list[head]);
879         txo->sent_skb_list[head] = skb;
880         txo->last_req_hdr = head;
881         atomic_add(num_frags, &txq->used);
882         txo->last_req_wrb_cnt = num_frags;
883         txo->pend_wrb_cnt += num_frags;
884 }
885
886 /* Setup a WRB fragment (buffer descriptor) for xmit */
887 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
888                                  int len)
889 {
890         struct be_eth_wrb *wrb;
891         struct be_queue_info *txq = &txo->q;
892
893         wrb = queue_head_node(txq);
894         wrb_fill(wrb, busaddr, len);
895         queue_head_inc(txq);
896 }
897
898 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
899  * was invoked. The producer index is restored to the previous packet and the
900  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
901  */
902 static void be_xmit_restore(struct be_adapter *adapter,
903                             struct be_tx_obj *txo, u32 head, bool map_single,
904                             u32 copied)
905 {
906         struct device *dev;
907         struct be_eth_wrb *wrb;
908         struct be_queue_info *txq = &txo->q;
909
910         dev = &adapter->pdev->dev;
911         txq->head = head;
912
913         /* skip the first wrb (hdr); it's not mapped */
914         queue_head_inc(txq);
915         while (copied) {
916                 wrb = queue_head_node(txq);
917                 unmap_tx_frag(dev, wrb, map_single);
918                 map_single = false;
919                 copied -= le32_to_cpu(wrb->frag_len);
920                 queue_head_inc(txq);
921         }
922
923         txq->head = head;
924 }
925
926 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
927  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
928  * of WRBs used up by the packet.
929  */
930 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
931                            struct sk_buff *skb,
932                            struct be_wrb_params *wrb_params)
933 {
934         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
935         struct device *dev = &adapter->pdev->dev;
936         struct be_queue_info *txq = &txo->q;
937         bool map_single = false;
938         u32 head = txq->head;
939         dma_addr_t busaddr;
940         int len;
941
942         head = be_tx_get_wrb_hdr(txo);
943
944         if (skb->len > skb->data_len) {
945                 len = skb_headlen(skb);
946
947                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
948                 if (dma_mapping_error(dev, busaddr))
949                         goto dma_err;
950                 map_single = true;
951                 be_tx_setup_wrb_frag(txo, busaddr, len);
952                 copied += len;
953         }
954
955         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
957                 len = skb_frag_size(frag);
958
959                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
960                 if (dma_mapping_error(dev, busaddr))
961                         goto dma_err;
962                 be_tx_setup_wrb_frag(txo, busaddr, len);
963                 copied += len;
964         }
965
966         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
967
968         be_tx_stats_update(txo, skb);
969         return wrb_cnt;
970
971 dma_err:
972         adapter->drv_stats.dma_map_errors++;
973         be_xmit_restore(adapter, txo, head, map_single, copied);
974         return 0;
975 }
976
977 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
978 {
979         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
980 }
981
982 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
983                                              struct sk_buff *skb,
984                                              struct be_wrb_params
985                                              *wrb_params)
986 {
987         u16 vlan_tag = 0;
988
989         skb = skb_share_check(skb, GFP_ATOMIC);
990         if (unlikely(!skb))
991                 return skb;
992
993         if (skb_vlan_tag_present(skb))
994                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
995
996         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
997                 if (!vlan_tag)
998                         vlan_tag = adapter->pvid;
999                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1000                  * skip VLAN insertion
1001                  */
1002                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1003         }
1004
1005         if (vlan_tag) {
1006                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1007                                                 vlan_tag);
1008                 if (unlikely(!skb))
1009                         return skb;
1010                 skb->vlan_tci = 0;
1011         }
1012
1013         /* Insert the outer VLAN, if any */
1014         if (adapter->qnq_vid) {
1015                 vlan_tag = adapter->qnq_vid;
1016                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1017                                                 vlan_tag);
1018                 if (unlikely(!skb))
1019                         return skb;
1020                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1021         }
1022
1023         return skb;
1024 }
1025
1026 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1027 {
1028         struct ethhdr *eh = (struct ethhdr *)skb->data;
1029         u16 offset = ETH_HLEN;
1030
1031         if (eh->h_proto == htons(ETH_P_IPV6)) {
1032                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1033
1034                 offset += sizeof(struct ipv6hdr);
1035                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1036                     ip6h->nexthdr != NEXTHDR_UDP) {
1037                         struct ipv6_opt_hdr *ehdr =
1038                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1039
1040                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1041                         if (ehdr->hdrlen == 0xff)
1042                                 return true;
1043                 }
1044         }
1045         return false;
1046 }
1047
1048 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1051 }
1052
1053 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1054 {
1055         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1056 }
1057
1058 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1059                                                   struct sk_buff *skb,
1060                                                   struct be_wrb_params
1061                                                   *wrb_params)
1062 {
1063         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1064         unsigned int eth_hdr_len;
1065         struct iphdr *ip;
1066
1067         /* For padded packets, BE HW modifies tot_len field in IP header
1068          * incorrecly when VLAN tag is inserted by HW.
1069          * For padded packets, Lancer computes incorrect checksum.
1070          */
1071         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1072                                                 VLAN_ETH_HLEN : ETH_HLEN;
1073         if (skb->len <= 60 &&
1074             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1075             is_ipv4_pkt(skb)) {
1076                 ip = (struct iphdr *)ip_hdr(skb);
1077                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1078         }
1079
1080         /* If vlan tag is already inlined in the packet, skip HW VLAN
1081          * tagging in pvid-tagging mode
1082          */
1083         if (be_pvid_tagging_enabled(adapter) &&
1084             veh->h_vlan_proto == htons(ETH_P_8021Q))
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086
1087         /* HW has a bug wherein it will calculate CSUM for VLAN
1088          * pkts even though it is disabled.
1089          * Manually insert VLAN in pkt.
1090          */
1091         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1092             skb_vlan_tag_present(skb)) {
1093                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1094                 if (unlikely(!skb))
1095                         goto err;
1096         }
1097
1098         /* HW may lockup when VLAN HW tagging is requested on
1099          * certain ipv6 packets. Drop such pkts if the HW workaround to
1100          * skip HW tagging is not enabled by FW.
1101          */
1102         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1103                      (adapter->pvid || adapter->qnq_vid) &&
1104                      !qnq_async_evt_rcvd(adapter)))
1105                 goto tx_drop;
1106
1107         /* Manual VLAN tag insertion to prevent:
1108          * ASIC lockup when the ASIC inserts VLAN tag into
1109          * certain ipv6 packets. Insert VLAN tags in driver,
1110          * and set event, completion, vlan bits accordingly
1111          * in the Tx WRB.
1112          */
1113         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1114             be_vlan_tag_tx_chk(adapter, skb)) {
1115                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1116                 if (unlikely(!skb))
1117                         goto err;
1118         }
1119
1120         return skb;
1121 tx_drop:
1122         dev_kfree_skb_any(skb);
1123 err:
1124         return NULL;
1125 }
1126
1127 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1128                                            struct sk_buff *skb,
1129                                            struct be_wrb_params *wrb_params)
1130 {
1131         int err;
1132
1133         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1134          * packets that are 32b or less may cause a transmit stall
1135          * on that port. The workaround is to pad such packets
1136          * (len <= 32 bytes) to a minimum length of 36b.
1137          */
1138         if (skb->len <= 32) {
1139                 if (skb_put_padto(skb, 36))
1140                         return NULL;
1141         }
1142
1143         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1144                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1145                 if (!skb)
1146                         return NULL;
1147         }
1148
1149         /* The stack can send us skbs with length greater than
1150          * what the HW can handle. Trim the extra bytes.
1151          */
1152         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1153         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1154         WARN_ON(err);
1155
1156         return skb;
1157 }
1158
1159 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1160 {
1161         struct be_queue_info *txq = &txo->q;
1162         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1163
1164         /* Mark the last request eventable if it hasn't been marked already */
1165         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1166                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1167
1168         /* compose a dummy wrb if there are odd set of wrbs to notify */
1169         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1170                 wrb_fill_dummy(queue_head_node(txq));
1171                 queue_head_inc(txq);
1172                 atomic_inc(&txq->used);
1173                 txo->pend_wrb_cnt++;
1174                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1175                                            TX_HDR_WRB_NUM_SHIFT);
1176                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1177                                           TX_HDR_WRB_NUM_SHIFT);
1178         }
1179         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1180         txo->pend_wrb_cnt = 0;
1181 }
1182
1183 /* OS2BMC related */
1184
1185 #define DHCP_CLIENT_PORT        68
1186 #define DHCP_SERVER_PORT        67
1187 #define NET_BIOS_PORT1          137
1188 #define NET_BIOS_PORT2          138
1189 #define DHCPV6_RAS_PORT         547
1190
1191 #define is_mc_allowed_on_bmc(adapter, eh)       \
1192         (!is_multicast_filt_enabled(adapter) && \
1193          is_multicast_ether_addr(eh->h_dest) && \
1194          !is_broadcast_ether_addr(eh->h_dest))
1195
1196 #define is_bc_allowed_on_bmc(adapter, eh)       \
1197         (!is_broadcast_filt_enabled(adapter) && \
1198          is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_arp_allowed_on_bmc(adapter, skb)     \
1201         (is_arp(skb) && is_arp_filt_enabled(adapter))
1202
1203 #define is_broadcast_packet(eh, adapter)        \
1204                 (is_multicast_ether_addr(eh->h_dest) && \
1205                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1206
1207 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1208
1209 #define is_arp_filt_enabled(adapter)    \
1210                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1211
1212 #define is_dhcp_client_filt_enabled(adapter)    \
1213                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1214
1215 #define is_dhcp_srvr_filt_enabled(adapter)      \
1216                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1217
1218 #define is_nbios_filt_enabled(adapter)  \
1219                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1220
1221 #define is_ipv6_na_filt_enabled(adapter)        \
1222                 (adapter->bmc_filt_mask &       \
1223                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1224
1225 #define is_ipv6_ra_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1227
1228 #define is_ipv6_ras_filt_enabled(adapter)       \
1229                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1230
1231 #define is_broadcast_filt_enabled(adapter)      \
1232                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1233
1234 #define is_multicast_filt_enabled(adapter)      \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1236
1237 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1238                                struct sk_buff **skb)
1239 {
1240         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1241         bool os2bmc = false;
1242
1243         if (!be_is_os2bmc_enabled(adapter))
1244                 goto done;
1245
1246         if (!is_multicast_ether_addr(eh->h_dest))
1247                 goto done;
1248
1249         if (is_mc_allowed_on_bmc(adapter, eh) ||
1250             is_bc_allowed_on_bmc(adapter, eh) ||
1251             is_arp_allowed_on_bmc(adapter, (*skb))) {
1252                 os2bmc = true;
1253                 goto done;
1254         }
1255
1256         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1257                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1258                 u8 nexthdr = hdr->nexthdr;
1259
1260                 if (nexthdr == IPPROTO_ICMPV6) {
1261                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1262
1263                         switch (icmp6->icmp6_type) {
1264                         case NDISC_ROUTER_ADVERTISEMENT:
1265                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1266                                 goto done;
1267                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1268                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1269                                 goto done;
1270                         default:
1271                                 break;
1272                         }
1273                 }
1274         }
1275
1276         if (is_udp_pkt((*skb))) {
1277                 struct udphdr *udp = udp_hdr((*skb));
1278
1279                 switch (ntohs(udp->dest)) {
1280                 case DHCP_CLIENT_PORT:
1281                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1282                         goto done;
1283                 case DHCP_SERVER_PORT:
1284                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1285                         goto done;
1286                 case NET_BIOS_PORT1:
1287                 case NET_BIOS_PORT2:
1288                         os2bmc = is_nbios_filt_enabled(adapter);
1289                         goto done;
1290                 case DHCPV6_RAS_PORT:
1291                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1292                         goto done;
1293                 default:
1294                         break;
1295                 }
1296         }
1297 done:
1298         /* For packets over a vlan, which are destined
1299          * to BMC, asic expects the vlan to be inline in the packet.
1300          */
1301         if (os2bmc)
1302                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1303
1304         return os2bmc;
1305 }
1306
1307 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1308 {
1309         struct be_adapter *adapter = netdev_priv(netdev);
1310         u16 q_idx = skb_get_queue_mapping(skb);
1311         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1312         struct be_wrb_params wrb_params = { 0 };
1313         bool flush = !skb->xmit_more;
1314         u16 wrb_cnt;
1315
1316         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1317         if (unlikely(!skb))
1318                 goto drop;
1319
1320         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1321
1322         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1323         if (unlikely(!wrb_cnt)) {
1324                 dev_kfree_skb_any(skb);
1325                 goto drop;
1326         }
1327
1328         /* if os2bmc is enabled and if the pkt is destined to bmc,
1329          * enqueue the pkt a 2nd time with mgmt bit set.
1330          */
1331         if (be_send_pkt_to_bmc(adapter, &skb)) {
1332                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1333                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1334                 if (unlikely(!wrb_cnt))
1335                         goto drop;
1336                 else
1337                         skb_get(skb);
1338         }
1339
1340         if (be_is_txq_full(txo)) {
1341                 netif_stop_subqueue(netdev, q_idx);
1342                 tx_stats(txo)->tx_stops++;
1343         }
1344
1345         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1346                 be_xmit_flush(adapter, txo);
1347
1348         return NETDEV_TX_OK;
1349 drop:
1350         tx_stats(txo)->tx_drv_drops++;
1351         /* Flush the already enqueued tx requests */
1352         if (flush && txo->pend_wrb_cnt)
1353                 be_xmit_flush(adapter, txo);
1354
1355         return NETDEV_TX_OK;
1356 }
1357
1358 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         struct device *dev = &adapter->pdev->dev;
1362
1363         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1364                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1365                          BE_MIN_MTU, BE_MAX_MTU);
1366                 return -EINVAL;
1367         }
1368
1369         dev_info(dev, "MTU changed from %d to %d bytes\n",
1370                  netdev->mtu, new_mtu);
1371         netdev->mtu = new_mtu;
1372         return 0;
1373 }
1374
1375 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1376 {
1377         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1378                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1379 }
1380
1381 static int be_set_vlan_promisc(struct be_adapter *adapter)
1382 {
1383         struct device *dev = &adapter->pdev->dev;
1384         int status;
1385
1386         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1387                 return 0;
1388
1389         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1390         if (!status) {
1391                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1392                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1393         } else {
1394                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1395         }
1396         return status;
1397 }
1398
1399 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1400 {
1401         struct device *dev = &adapter->pdev->dev;
1402         int status;
1403
1404         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1405         if (!status) {
1406                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1407                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1408         }
1409         return status;
1410 }
1411
1412 /*
1413  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1414  * If the user configures more, place BE in vlan promiscuous mode.
1415  */
1416 static int be_vid_config(struct be_adapter *adapter)
1417 {
1418         struct device *dev = &adapter->pdev->dev;
1419         u16 vids[BE_NUM_VLANS_SUPPORTED];
1420         u16 num = 0, i = 0;
1421         int status = 0;
1422
1423         /* No need to further configure vids if in promiscuous mode */
1424         if (be_in_all_promisc(adapter))
1425                 return 0;
1426
1427         if (adapter->vlans_added > be_max_vlans(adapter))
1428                 return be_set_vlan_promisc(adapter);
1429
1430         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1431                 status = be_clear_vlan_promisc(adapter);
1432                 if (status)
1433                         return status;
1434         }
1435         /* Construct VLAN Table to give to HW */
1436         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1437                 vids[num++] = cpu_to_le16(i);
1438
1439         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1440         if (status) {
1441                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1442                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1443                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1444                     addl_status(status) ==
1445                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1446                         return be_set_vlan_promisc(adapter);
1447         }
1448         return status;
1449 }
1450
1451 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1452 {
1453         struct be_adapter *adapter = netdev_priv(netdev);
1454         int status = 0;
1455
1456         /* Packets with VID 0 are always received by Lancer by default */
1457         if (lancer_chip(adapter) && vid == 0)
1458                 return status;
1459
1460         if (test_bit(vid, adapter->vids))
1461                 return status;
1462
1463         set_bit(vid, adapter->vids);
1464         adapter->vlans_added++;
1465
1466         return be_vid_config(adapter);
1467 }
1468
1469 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1470 {
1471         struct be_adapter *adapter = netdev_priv(netdev);
1472
1473         /* Packets with VID 0 are always received by Lancer by default */
1474         if (lancer_chip(adapter) && vid == 0)
1475                 return 0;
1476
1477         if (!test_bit(vid, adapter->vids))
1478                 return 0;
1479
1480         clear_bit(vid, adapter->vids);
1481         adapter->vlans_added--;
1482
1483         return be_vid_config(adapter);
1484 }
1485
1486 static void be_clear_all_promisc(struct be_adapter *adapter)
1487 {
1488         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
1489         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
1490 }
1491
1492 static void be_set_all_promisc(struct be_adapter *adapter)
1493 {
1494         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1495         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1496 }
1497
1498 static void be_set_mc_promisc(struct be_adapter *adapter)
1499 {
1500         int status;
1501
1502         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1503                 return;
1504
1505         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1506         if (!status)
1507                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1508 }
1509
1510 static void be_set_mc_list(struct be_adapter *adapter)
1511 {
1512         int status;
1513
1514         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1515         if (!status)
1516                 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1517         else
1518                 be_set_mc_promisc(adapter);
1519 }
1520
1521 static void be_set_uc_list(struct be_adapter *adapter)
1522 {
1523         struct netdev_hw_addr *ha;
1524         int i = 1; /* First slot is claimed by the Primary MAC */
1525
1526         for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1527                 be_cmd_pmac_del(adapter, adapter->if_handle,
1528                                 adapter->pmac_id[i], 0);
1529
1530         if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
1531                 be_set_all_promisc(adapter);
1532                 return;
1533         }
1534
1535         netdev_for_each_uc_addr(ha, adapter->netdev) {
1536                 adapter->uc_macs++; /* First slot is for Primary MAC */
1537                 be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
1538                                 &adapter->pmac_id[adapter->uc_macs], 0);
1539         }
1540 }
1541
1542 static void be_clear_uc_list(struct be_adapter *adapter)
1543 {
1544         int i;
1545
1546         for (i = 1; i < (adapter->uc_macs + 1); i++)
1547                 be_cmd_pmac_del(adapter, adapter->if_handle,
1548                                 adapter->pmac_id[i], 0);
1549         adapter->uc_macs = 0;
1550 }
1551
1552 static void be_set_rx_mode(struct net_device *netdev)
1553 {
1554         struct be_adapter *adapter = netdev_priv(netdev);
1555
1556         if (netdev->flags & IFF_PROMISC) {
1557                 be_set_all_promisc(adapter);
1558                 return;
1559         }
1560
1561         /* Interface was previously in promiscuous mode; disable it */
1562         if (be_in_all_promisc(adapter)) {
1563                 be_clear_all_promisc(adapter);
1564                 if (adapter->vlans_added)
1565                         be_vid_config(adapter);
1566         }
1567
1568         /* Enable multicast promisc if num configured exceeds what we support */
1569         if (netdev->flags & IFF_ALLMULTI ||
1570             netdev_mc_count(netdev) > be_max_mc(adapter)) {
1571                 be_set_mc_promisc(adapter);
1572                 return;
1573         }
1574
1575         if (netdev_uc_count(netdev) != adapter->uc_macs)
1576                 be_set_uc_list(adapter);
1577
1578         be_set_mc_list(adapter);
1579 }
1580
1581 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1582 {
1583         struct be_adapter *adapter = netdev_priv(netdev);
1584         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1585         int status;
1586
1587         if (!sriov_enabled(adapter))
1588                 return -EPERM;
1589
1590         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1591                 return -EINVAL;
1592
1593         /* Proceed further only if user provided MAC is different
1594          * from active MAC
1595          */
1596         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1597                 return 0;
1598
1599         if (BEx_chip(adapter)) {
1600                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1601                                 vf + 1);
1602
1603                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1604                                          &vf_cfg->pmac_id, vf + 1);
1605         } else {
1606                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1607                                         vf + 1);
1608         }
1609
1610         if (status) {
1611                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1612                         mac, vf, status);
1613                 return be_cmd_status(status);
1614         }
1615
1616         ether_addr_copy(vf_cfg->mac_addr, mac);
1617
1618         return 0;
1619 }
1620
1621 static int be_get_vf_config(struct net_device *netdev, int vf,
1622                             struct ifla_vf_info *vi)
1623 {
1624         struct be_adapter *adapter = netdev_priv(netdev);
1625         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1626
1627         if (!sriov_enabled(adapter))
1628                 return -EPERM;
1629
1630         if (vf >= adapter->num_vfs)
1631                 return -EINVAL;
1632
1633         vi->vf = vf;
1634         vi->max_tx_rate = vf_cfg->tx_rate;
1635         vi->min_tx_rate = 0;
1636         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1637         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1638         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1639         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1640         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1641
1642         return 0;
1643 }
1644
1645 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1646 {
1647         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1648         u16 vids[BE_NUM_VLANS_SUPPORTED];
1649         int vf_if_id = vf_cfg->if_handle;
1650         int status;
1651
1652         /* Enable Transparent VLAN Tagging */
1653         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1654         if (status)
1655                 return status;
1656
1657         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1658         vids[0] = 0;
1659         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1660         if (!status)
1661                 dev_info(&adapter->pdev->dev,
1662                          "Cleared guest VLANs on VF%d", vf);
1663
1664         /* After TVT is enabled, disallow VFs to program VLAN filters */
1665         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1666                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1667                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1668                 if (!status)
1669                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1670         }
1671         return 0;
1672 }
1673
1674 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1675 {
1676         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1677         struct device *dev = &adapter->pdev->dev;
1678         int status;
1679
1680         /* Reset Transparent VLAN Tagging. */
1681         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1682                                        vf_cfg->if_handle, 0, 0);
1683         if (status)
1684                 return status;
1685
1686         /* Allow VFs to program VLAN filtering */
1687         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1688                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1689                                                   BE_PRIV_FILTMGMT, vf + 1);
1690                 if (!status) {
1691                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1692                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1693                 }
1694         }
1695
1696         dev_info(dev,
1697                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1698         return 0;
1699 }
1700
1701 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1702 {
1703         struct be_adapter *adapter = netdev_priv(netdev);
1704         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1705         int status;
1706
1707         if (!sriov_enabled(adapter))
1708                 return -EPERM;
1709
1710         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1711                 return -EINVAL;
1712
1713         if (vlan || qos) {
1714                 vlan |= qos << VLAN_PRIO_SHIFT;
1715                 status = be_set_vf_tvt(adapter, vf, vlan);
1716         } else {
1717                 status = be_clear_vf_tvt(adapter, vf);
1718         }
1719
1720         if (status) {
1721                 dev_err(&adapter->pdev->dev,
1722                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1723                         status);
1724                 return be_cmd_status(status);
1725         }
1726
1727         vf_cfg->vlan_tag = vlan;
1728         return 0;
1729 }
1730
1731 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1732                              int min_tx_rate, int max_tx_rate)
1733 {
1734         struct be_adapter *adapter = netdev_priv(netdev);
1735         struct device *dev = &adapter->pdev->dev;
1736         int percent_rate, status = 0;
1737         u16 link_speed = 0;
1738         u8 link_status;
1739
1740         if (!sriov_enabled(adapter))
1741                 return -EPERM;
1742
1743         if (vf >= adapter->num_vfs)
1744                 return -EINVAL;
1745
1746         if (min_tx_rate)
1747                 return -EINVAL;
1748
1749         if (!max_tx_rate)
1750                 goto config_qos;
1751
1752         status = be_cmd_link_status_query(adapter, &link_speed,
1753                                           &link_status, 0);
1754         if (status)
1755                 goto err;
1756
1757         if (!link_status) {
1758                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1759                 status = -ENETDOWN;
1760                 goto err;
1761         }
1762
1763         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1764                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1765                         link_speed);
1766                 status = -EINVAL;
1767                 goto err;
1768         }
1769
1770         /* On Skyhawk the QOS setting must be done only as a % value */
1771         percent_rate = link_speed / 100;
1772         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1773                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1774                         percent_rate);
1775                 status = -EINVAL;
1776                 goto err;
1777         }
1778
1779 config_qos:
1780         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1781         if (status)
1782                 goto err;
1783
1784         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1785         return 0;
1786
1787 err:
1788         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1789                 max_tx_rate, vf);
1790         return be_cmd_status(status);
1791 }
1792
1793 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1794                                 int link_state)
1795 {
1796         struct be_adapter *adapter = netdev_priv(netdev);
1797         int status;
1798
1799         if (!sriov_enabled(adapter))
1800                 return -EPERM;
1801
1802         if (vf >= adapter->num_vfs)
1803                 return -EINVAL;
1804
1805         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1806         if (status) {
1807                 dev_err(&adapter->pdev->dev,
1808                         "Link state change on VF %d failed: %#x\n", vf, status);
1809                 return be_cmd_status(status);
1810         }
1811
1812         adapter->vf_cfg[vf].plink_tracking = link_state;
1813
1814         return 0;
1815 }
1816
1817 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1818 {
1819         struct be_adapter *adapter = netdev_priv(netdev);
1820         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1821         u8 spoofchk;
1822         int status;
1823
1824         if (!sriov_enabled(adapter))
1825                 return -EPERM;
1826
1827         if (vf >= adapter->num_vfs)
1828                 return -EINVAL;
1829
1830         if (BEx_chip(adapter))
1831                 return -EOPNOTSUPP;
1832
1833         if (enable == vf_cfg->spoofchk)
1834                 return 0;
1835
1836         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1837
1838         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1839                                        0, spoofchk);
1840         if (status) {
1841                 dev_err(&adapter->pdev->dev,
1842                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1843                 return be_cmd_status(status);
1844         }
1845
1846         vf_cfg->spoofchk = enable;
1847         return 0;
1848 }
1849
1850 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1851                           ulong now)
1852 {
1853         aic->rx_pkts_prev = rx_pkts;
1854         aic->tx_reqs_prev = tx_pkts;
1855         aic->jiffies = now;
1856 }
1857
1858 static int be_get_new_eqd(struct be_eq_obj *eqo)
1859 {
1860         struct be_adapter *adapter = eqo->adapter;
1861         int eqd, start;
1862         struct be_aic_obj *aic;
1863         struct be_rx_obj *rxo;
1864         struct be_tx_obj *txo;
1865         u64 rx_pkts = 0, tx_pkts = 0;
1866         ulong now;
1867         u32 pps, delta;
1868         int i;
1869
1870         aic = &adapter->aic_obj[eqo->idx];
1871         if (!aic->enable) {
1872                 if (aic->jiffies)
1873                         aic->jiffies = 0;
1874                 eqd = aic->et_eqd;
1875                 return eqd;
1876         }
1877
1878         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1879                 do {
1880                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1881                         rx_pkts += rxo->stats.rx_pkts;
1882                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1883         }
1884
1885         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1886                 do {
1887                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1888                         tx_pkts += txo->stats.tx_reqs;
1889                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1890         }
1891
1892         /* Skip, if wrapped around or first calculation */
1893         now = jiffies;
1894         if (!aic->jiffies || time_before(now, aic->jiffies) ||
1895             rx_pkts < aic->rx_pkts_prev ||
1896             tx_pkts < aic->tx_reqs_prev) {
1897                 be_aic_update(aic, rx_pkts, tx_pkts, now);
1898                 return aic->prev_eqd;
1899         }
1900
1901         delta = jiffies_to_msecs(now - aic->jiffies);
1902         if (delta == 0)
1903                 return aic->prev_eqd;
1904
1905         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1906                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1907         eqd = (pps / 15000) << 2;
1908
1909         if (eqd < 8)
1910                 eqd = 0;
1911         eqd = min_t(u32, eqd, aic->max_eqd);
1912         eqd = max_t(u32, eqd, aic->min_eqd);
1913
1914         be_aic_update(aic, rx_pkts, tx_pkts, now);
1915
1916         return eqd;
1917 }
1918
1919 /* For Skyhawk-R only */
1920 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
1921 {
1922         struct be_adapter *adapter = eqo->adapter;
1923         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
1924         ulong now = jiffies;
1925         int eqd;
1926         u32 mult_enc;
1927
1928         if (!aic->enable)
1929                 return 0;
1930
1931         if (jiffies_to_msecs(now - aic->jiffies) < 1)
1932                 eqd = aic->prev_eqd;
1933         else
1934                 eqd = be_get_new_eqd(eqo);
1935
1936         if (eqd > 100)
1937                 mult_enc = R2I_DLY_ENC_1;
1938         else if (eqd > 60)
1939                 mult_enc = R2I_DLY_ENC_2;
1940         else if (eqd > 20)
1941                 mult_enc = R2I_DLY_ENC_3;
1942         else
1943                 mult_enc = R2I_DLY_ENC_0;
1944
1945         aic->prev_eqd = eqd;
1946
1947         return mult_enc;
1948 }
1949
1950 void be_eqd_update(struct be_adapter *adapter, bool force_update)
1951 {
1952         struct be_set_eqd set_eqd[MAX_EVT_QS];
1953         struct be_aic_obj *aic;
1954         struct be_eq_obj *eqo;
1955         int i, num = 0, eqd;
1956
1957         for_all_evt_queues(adapter, eqo, i) {
1958                 aic = &adapter->aic_obj[eqo->idx];
1959                 eqd = be_get_new_eqd(eqo);
1960                 if (force_update || eqd != aic->prev_eqd) {
1961                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
1962                         set_eqd[num].eq_id = eqo->q.id;
1963                         aic->prev_eqd = eqd;
1964                         num++;
1965                 }
1966         }
1967
1968         if (num)
1969                 be_cmd_modify_eqd(adapter, set_eqd, num);
1970 }
1971
1972 static void be_rx_stats_update(struct be_rx_obj *rxo,
1973                                struct be_rx_compl_info *rxcp)
1974 {
1975         struct be_rx_stats *stats = rx_stats(rxo);
1976
1977         u64_stats_update_begin(&stats->sync);
1978         stats->rx_compl++;
1979         stats->rx_bytes += rxcp->pkt_size;
1980         stats->rx_pkts++;
1981         if (rxcp->tunneled)
1982                 stats->rx_vxlan_offload_pkts++;
1983         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
1984                 stats->rx_mcast_pkts++;
1985         if (rxcp->err)
1986                 stats->rx_compl_err++;
1987         u64_stats_update_end(&stats->sync);
1988 }
1989
1990 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
1991 {
1992         /* L4 checksum is not reliable for non TCP/UDP packets.
1993          * Also ignore ipcksm for ipv6 pkts
1994          */
1995         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
1996                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
1997 }
1998
1999 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2000 {
2001         struct be_adapter *adapter = rxo->adapter;
2002         struct be_rx_page_info *rx_page_info;
2003         struct be_queue_info *rxq = &rxo->q;
2004         u32 frag_idx = rxq->tail;
2005
2006         rx_page_info = &rxo->page_info_tbl[frag_idx];
2007         BUG_ON(!rx_page_info->page);
2008
2009         if (rx_page_info->last_frag) {
2010                 dma_unmap_page(&adapter->pdev->dev,
2011                                dma_unmap_addr(rx_page_info, bus),
2012                                adapter->big_page_size, DMA_FROM_DEVICE);
2013                 rx_page_info->last_frag = false;
2014         } else {
2015                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2016                                         dma_unmap_addr(rx_page_info, bus),
2017                                         rx_frag_size, DMA_FROM_DEVICE);
2018         }
2019
2020         queue_tail_inc(rxq);
2021         atomic_dec(&rxq->used);
2022         return rx_page_info;
2023 }
2024
2025 /* Throwaway the data in the Rx completion */
2026 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2027                                 struct be_rx_compl_info *rxcp)
2028 {
2029         struct be_rx_page_info *page_info;
2030         u16 i, num_rcvd = rxcp->num_rcvd;
2031
2032         for (i = 0; i < num_rcvd; i++) {
2033                 page_info = get_rx_page_info(rxo);
2034                 put_page(page_info->page);
2035                 memset(page_info, 0, sizeof(*page_info));
2036         }
2037 }
2038
2039 /*
2040  * skb_fill_rx_data forms a complete skb for an ether frame
2041  * indicated by rxcp.
2042  */
2043 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2044                              struct be_rx_compl_info *rxcp)
2045 {
2046         struct be_rx_page_info *page_info;
2047         u16 i, j;
2048         u16 hdr_len, curr_frag_len, remaining;
2049         u8 *start;
2050
2051         page_info = get_rx_page_info(rxo);
2052         start = page_address(page_info->page) + page_info->page_offset;
2053         prefetch(start);
2054
2055         /* Copy data in the first descriptor of this completion */
2056         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2057
2058         skb->len = curr_frag_len;
2059         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2060                 memcpy(skb->data, start, curr_frag_len);
2061                 /* Complete packet has now been moved to data */
2062                 put_page(page_info->page);
2063                 skb->data_len = 0;
2064                 skb->tail += curr_frag_len;
2065         } else {
2066                 hdr_len = ETH_HLEN;
2067                 memcpy(skb->data, start, hdr_len);
2068                 skb_shinfo(skb)->nr_frags = 1;
2069                 skb_frag_set_page(skb, 0, page_info->page);
2070                 skb_shinfo(skb)->frags[0].page_offset =
2071                                         page_info->page_offset + hdr_len;
2072                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2073                                   curr_frag_len - hdr_len);
2074                 skb->data_len = curr_frag_len - hdr_len;
2075                 skb->truesize += rx_frag_size;
2076                 skb->tail += hdr_len;
2077         }
2078         page_info->page = NULL;
2079
2080         if (rxcp->pkt_size <= rx_frag_size) {
2081                 BUG_ON(rxcp->num_rcvd != 1);
2082                 return;
2083         }
2084
2085         /* More frags present for this completion */
2086         remaining = rxcp->pkt_size - curr_frag_len;
2087         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2088                 page_info = get_rx_page_info(rxo);
2089                 curr_frag_len = min(remaining, rx_frag_size);
2090
2091                 /* Coalesce all frags from the same physical page in one slot */
2092                 if (page_info->page_offset == 0) {
2093                         /* Fresh page */
2094                         j++;
2095                         skb_frag_set_page(skb, j, page_info->page);
2096                         skb_shinfo(skb)->frags[j].page_offset =
2097                                                         page_info->page_offset;
2098                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2099                         skb_shinfo(skb)->nr_frags++;
2100                 } else {
2101                         put_page(page_info->page);
2102                 }
2103
2104                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2105                 skb->len += curr_frag_len;
2106                 skb->data_len += curr_frag_len;
2107                 skb->truesize += rx_frag_size;
2108                 remaining -= curr_frag_len;
2109                 page_info->page = NULL;
2110         }
2111         BUG_ON(j > MAX_SKB_FRAGS);
2112 }
2113
2114 /* Process the RX completion indicated by rxcp when GRO is disabled */
2115 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2116                                 struct be_rx_compl_info *rxcp)
2117 {
2118         struct be_adapter *adapter = rxo->adapter;
2119         struct net_device *netdev = adapter->netdev;
2120         struct sk_buff *skb;
2121
2122         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2123         if (unlikely(!skb)) {
2124                 rx_stats(rxo)->rx_drops_no_skbs++;
2125                 be_rx_compl_discard(rxo, rxcp);
2126                 return;
2127         }
2128
2129         skb_fill_rx_data(rxo, skb, rxcp);
2130
2131         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2132                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2133         else
2134                 skb_checksum_none_assert(skb);
2135
2136         skb->protocol = eth_type_trans(skb, netdev);
2137         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2138         if (netdev->features & NETIF_F_RXHASH)
2139                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2140
2141         skb->csum_level = rxcp->tunneled;
2142         skb_mark_napi_id(skb, napi);
2143
2144         if (rxcp->vlanf)
2145                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2146
2147         netif_receive_skb(skb);
2148 }
2149
2150 /* Process the RX completion indicated by rxcp when GRO is enabled */
2151 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2152                                     struct napi_struct *napi,
2153                                     struct be_rx_compl_info *rxcp)
2154 {
2155         struct be_adapter *adapter = rxo->adapter;
2156         struct be_rx_page_info *page_info;
2157         struct sk_buff *skb = NULL;
2158         u16 remaining, curr_frag_len;
2159         u16 i, j;
2160
2161         skb = napi_get_frags(napi);
2162         if (!skb) {
2163                 be_rx_compl_discard(rxo, rxcp);
2164                 return;
2165         }
2166
2167         remaining = rxcp->pkt_size;
2168         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2169                 page_info = get_rx_page_info(rxo);
2170
2171                 curr_frag_len = min(remaining, rx_frag_size);
2172
2173                 /* Coalesce all frags from the same physical page in one slot */
2174                 if (i == 0 || page_info->page_offset == 0) {
2175                         /* First frag or Fresh page */
2176                         j++;
2177                         skb_frag_set_page(skb, j, page_info->page);
2178                         skb_shinfo(skb)->frags[j].page_offset =
2179                                                         page_info->page_offset;
2180                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2181                 } else {
2182                         put_page(page_info->page);
2183                 }
2184                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2185                 skb->truesize += rx_frag_size;
2186                 remaining -= curr_frag_len;
2187                 memset(page_info, 0, sizeof(*page_info));
2188         }
2189         BUG_ON(j > MAX_SKB_FRAGS);
2190
2191         skb_shinfo(skb)->nr_frags = j + 1;
2192         skb->len = rxcp->pkt_size;
2193         skb->data_len = rxcp->pkt_size;
2194         skb->ip_summed = CHECKSUM_UNNECESSARY;
2195         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2196         if (adapter->netdev->features & NETIF_F_RXHASH)
2197                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2198
2199         skb->csum_level = rxcp->tunneled;
2200
2201         if (rxcp->vlanf)
2202                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2203
2204         napi_gro_frags(napi);
2205 }
2206
2207 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2208                                  struct be_rx_compl_info *rxcp)
2209 {
2210         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2211         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2212         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2213         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2214         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2215         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2216         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2217         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2218         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2219         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2220         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2221         if (rxcp->vlanf) {
2222                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2223                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2224         }
2225         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2226         rxcp->tunneled =
2227                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2228 }
2229
2230 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2231                                  struct be_rx_compl_info *rxcp)
2232 {
2233         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2234         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2235         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2236         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2237         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2238         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2239         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2240         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2241         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2242         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2243         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2244         if (rxcp->vlanf) {
2245                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2246                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2247         }
2248         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2249         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2250 }
2251
2252 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2253 {
2254         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2255         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2256         struct be_adapter *adapter = rxo->adapter;
2257
2258         /* For checking the valid bit it is Ok to use either definition as the
2259          * valid bit is at the same position in both v0 and v1 Rx compl */
2260         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2261                 return NULL;
2262
2263         rmb();
2264         be_dws_le_to_cpu(compl, sizeof(*compl));
2265
2266         if (adapter->be3_native)
2267                 be_parse_rx_compl_v1(compl, rxcp);
2268         else
2269                 be_parse_rx_compl_v0(compl, rxcp);
2270
2271         if (rxcp->ip_frag)
2272                 rxcp->l4_csum = 0;
2273
2274         if (rxcp->vlanf) {
2275                 /* In QNQ modes, if qnq bit is not set, then the packet was
2276                  * tagged only with the transparent outer vlan-tag and must
2277                  * not be treated as a vlan packet by host
2278                  */
2279                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2280                         rxcp->vlanf = 0;
2281
2282                 if (!lancer_chip(adapter))
2283                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2284
2285                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2286                     !test_bit(rxcp->vlan_tag, adapter->vids))
2287                         rxcp->vlanf = 0;
2288         }
2289
2290         /* As the compl has been parsed, reset it; we wont touch it again */
2291         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2292
2293         queue_tail_inc(&rxo->cq);
2294         return rxcp;
2295 }
2296
2297 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2298 {
2299         u32 order = get_order(size);
2300
2301         if (order > 0)
2302                 gfp |= __GFP_COMP;
2303         return  alloc_pages(gfp, order);
2304 }
2305
2306 /*
2307  * Allocate a page, split it to fragments of size rx_frag_size and post as
2308  * receive buffers to BE
2309  */
2310 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2311 {
2312         struct be_adapter *adapter = rxo->adapter;
2313         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2314         struct be_queue_info *rxq = &rxo->q;
2315         struct page *pagep = NULL;
2316         struct device *dev = &adapter->pdev->dev;
2317         struct be_eth_rx_d *rxd;
2318         u64 page_dmaaddr = 0, frag_dmaaddr;
2319         u32 posted, page_offset = 0, notify = 0;
2320
2321         page_info = &rxo->page_info_tbl[rxq->head];
2322         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2323                 if (!pagep) {
2324                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2325                         if (unlikely(!pagep)) {
2326                                 rx_stats(rxo)->rx_post_fail++;
2327                                 break;
2328                         }
2329                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2330                                                     adapter->big_page_size,
2331                                                     DMA_FROM_DEVICE);
2332                         if (dma_mapping_error(dev, page_dmaaddr)) {
2333                                 put_page(pagep);
2334                                 pagep = NULL;
2335                                 adapter->drv_stats.dma_map_errors++;
2336                                 break;
2337                         }
2338                         page_offset = 0;
2339                 } else {
2340                         get_page(pagep);
2341                         page_offset += rx_frag_size;
2342                 }
2343                 page_info->page_offset = page_offset;
2344                 page_info->page = pagep;
2345
2346                 rxd = queue_head_node(rxq);
2347                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2348                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2349                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2350
2351                 /* Any space left in the current big page for another frag? */
2352                 if ((page_offset + rx_frag_size + rx_frag_size) >
2353                                         adapter->big_page_size) {
2354                         pagep = NULL;
2355                         page_info->last_frag = true;
2356                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2357                 } else {
2358                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2359                 }
2360
2361                 prev_page_info = page_info;
2362                 queue_head_inc(rxq);
2363                 page_info = &rxo->page_info_tbl[rxq->head];
2364         }
2365
2366         /* Mark the last frag of a page when we break out of the above loop
2367          * with no more slots available in the RXQ
2368          */
2369         if (pagep) {
2370                 prev_page_info->last_frag = true;
2371                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2372         }
2373
2374         if (posted) {
2375                 atomic_add(posted, &rxq->used);
2376                 if (rxo->rx_post_starved)
2377                         rxo->rx_post_starved = false;
2378                 do {
2379                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2380                         be_rxq_notify(adapter, rxq->id, notify);
2381                         posted -= notify;
2382                 } while (posted);
2383         } else if (atomic_read(&rxq->used) == 0) {
2384                 /* Let be_worker replenish when memory is available */
2385                 rxo->rx_post_starved = true;
2386         }
2387 }
2388
2389 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2390 {
2391         struct be_queue_info *tx_cq = &txo->cq;
2392         struct be_tx_compl_info *txcp = &txo->txcp;
2393         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2394
2395         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2396                 return NULL;
2397
2398         /* Ensure load ordering of valid bit dword and other dwords below */
2399         rmb();
2400         be_dws_le_to_cpu(compl, sizeof(*compl));
2401
2402         txcp->status = GET_TX_COMPL_BITS(status, compl);
2403         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2404
2405         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2406         queue_tail_inc(tx_cq);
2407         return txcp;
2408 }
2409
2410 static u16 be_tx_compl_process(struct be_adapter *adapter,
2411                                struct be_tx_obj *txo, u16 last_index)
2412 {
2413         struct sk_buff **sent_skbs = txo->sent_skb_list;
2414         struct be_queue_info *txq = &txo->q;
2415         struct sk_buff *skb = NULL;
2416         bool unmap_skb_hdr = false;
2417         struct be_eth_wrb *wrb;
2418         u16 num_wrbs = 0;
2419         u32 frag_index;
2420
2421         do {
2422                 if (sent_skbs[txq->tail]) {
2423                         /* Free skb from prev req */
2424                         if (skb)
2425                                 dev_consume_skb_any(skb);
2426                         skb = sent_skbs[txq->tail];
2427                         sent_skbs[txq->tail] = NULL;
2428                         queue_tail_inc(txq);  /* skip hdr wrb */
2429                         num_wrbs++;
2430                         unmap_skb_hdr = true;
2431                 }
2432                 wrb = queue_tail_node(txq);
2433                 frag_index = txq->tail;
2434                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2435                               (unmap_skb_hdr && skb_headlen(skb)));
2436                 unmap_skb_hdr = false;
2437                 queue_tail_inc(txq);
2438                 num_wrbs++;
2439         } while (frag_index != last_index);
2440         dev_consume_skb_any(skb);
2441
2442         return num_wrbs;
2443 }
2444
2445 /* Return the number of events in the event queue */
2446 static inline int events_get(struct be_eq_obj *eqo)
2447 {
2448         struct be_eq_entry *eqe;
2449         int num = 0;
2450
2451         do {
2452                 eqe = queue_tail_node(&eqo->q);
2453                 if (eqe->evt == 0)
2454                         break;
2455
2456                 rmb();
2457                 eqe->evt = 0;
2458                 num++;
2459                 queue_tail_inc(&eqo->q);
2460         } while (true);
2461
2462         return num;
2463 }
2464
2465 /* Leaves the EQ is disarmed state */
2466 static void be_eq_clean(struct be_eq_obj *eqo)
2467 {
2468         int num = events_get(eqo);
2469
2470         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2471 }
2472
2473 /* Free posted rx buffers that were not used */
2474 static void be_rxq_clean(struct be_rx_obj *rxo)
2475 {
2476         struct be_queue_info *rxq = &rxo->q;
2477         struct be_rx_page_info *page_info;
2478
2479         while (atomic_read(&rxq->used) > 0) {
2480                 page_info = get_rx_page_info(rxo);
2481                 put_page(page_info->page);
2482                 memset(page_info, 0, sizeof(*page_info));
2483         }
2484         BUG_ON(atomic_read(&rxq->used));
2485         rxq->tail = 0;
2486         rxq->head = 0;
2487 }
2488
2489 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2490 {
2491         struct be_queue_info *rx_cq = &rxo->cq;
2492         struct be_rx_compl_info *rxcp;
2493         struct be_adapter *adapter = rxo->adapter;
2494         int flush_wait = 0;
2495
2496         /* Consume pending rx completions.
2497          * Wait for the flush completion (identified by zero num_rcvd)
2498          * to arrive. Notify CQ even when there are no more CQ entries
2499          * for HW to flush partially coalesced CQ entries.
2500          * In Lancer, there is no need to wait for flush compl.
2501          */
2502         for (;;) {
2503                 rxcp = be_rx_compl_get(rxo);
2504                 if (!rxcp) {
2505                         if (lancer_chip(adapter))
2506                                 break;
2507
2508                         if (flush_wait++ > 50 ||
2509                             be_check_error(adapter,
2510                                            BE_ERROR_HW)) {
2511                                 dev_warn(&adapter->pdev->dev,
2512                                          "did not receive flush compl\n");
2513                                 break;
2514                         }
2515                         be_cq_notify(adapter, rx_cq->id, true, 0);
2516                         mdelay(1);
2517                 } else {
2518                         be_rx_compl_discard(rxo, rxcp);
2519                         be_cq_notify(adapter, rx_cq->id, false, 1);
2520                         if (rxcp->num_rcvd == 0)
2521                                 break;
2522                 }
2523         }
2524
2525         /* After cleanup, leave the CQ in unarmed state */
2526         be_cq_notify(adapter, rx_cq->id, false, 0);
2527 }
2528
2529 static void be_tx_compl_clean(struct be_adapter *adapter)
2530 {
2531         struct device *dev = &adapter->pdev->dev;
2532         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2533         struct be_tx_compl_info *txcp;
2534         struct be_queue_info *txq;
2535         u32 end_idx, notified_idx;
2536         struct be_tx_obj *txo;
2537         int i, pending_txqs;
2538
2539         /* Stop polling for compls when HW has been silent for 10ms */
2540         do {
2541                 pending_txqs = adapter->num_tx_qs;
2542
2543                 for_all_tx_queues(adapter, txo, i) {
2544                         cmpl = 0;
2545                         num_wrbs = 0;
2546                         txq = &txo->q;
2547                         while ((txcp = be_tx_compl_get(txo))) {
2548                                 num_wrbs +=
2549                                         be_tx_compl_process(adapter, txo,
2550                                                             txcp->end_index);
2551                                 cmpl++;
2552                         }
2553                         if (cmpl) {
2554                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2555                                 atomic_sub(num_wrbs, &txq->used);
2556                                 timeo = 0;
2557                         }
2558                         if (!be_is_tx_compl_pending(txo))
2559                                 pending_txqs--;
2560                 }
2561
2562                 if (pending_txqs == 0 || ++timeo > 10 ||
2563                     be_check_error(adapter, BE_ERROR_HW))
2564                         break;
2565
2566                 mdelay(1);
2567         } while (true);
2568
2569         /* Free enqueued TX that was never notified to HW */
2570         for_all_tx_queues(adapter, txo, i) {
2571                 txq = &txo->q;
2572
2573                 if (atomic_read(&txq->used)) {
2574                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2575                                  i, atomic_read(&txq->used));
2576                         notified_idx = txq->tail;
2577                         end_idx = txq->tail;
2578                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2579                                   txq->len);
2580                         /* Use the tx-compl process logic to handle requests
2581                          * that were not sent to the HW.
2582                          */
2583                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2584                         atomic_sub(num_wrbs, &txq->used);
2585                         BUG_ON(atomic_read(&txq->used));
2586                         txo->pend_wrb_cnt = 0;
2587                         /* Since hw was never notified of these requests,
2588                          * reset TXQ indices
2589                          */
2590                         txq->head = notified_idx;
2591                         txq->tail = notified_idx;
2592                 }
2593         }
2594 }
2595
2596 static void be_evt_queues_destroy(struct be_adapter *adapter)
2597 {
2598         struct be_eq_obj *eqo;
2599         int i;
2600
2601         for_all_evt_queues(adapter, eqo, i) {
2602                 if (eqo->q.created) {
2603                         be_eq_clean(eqo);
2604                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2605                         napi_hash_del(&eqo->napi);
2606                         netif_napi_del(&eqo->napi);
2607                         free_cpumask_var(eqo->affinity_mask);
2608                 }
2609                 be_queue_free(adapter, &eqo->q);
2610         }
2611 }
2612
2613 static int be_evt_queues_create(struct be_adapter *adapter)
2614 {
2615         struct be_queue_info *eq;
2616         struct be_eq_obj *eqo;
2617         struct be_aic_obj *aic;
2618         int i, rc;
2619
2620         /* need enough EQs to service both RX and TX queues */
2621         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2622                                     max(adapter->cfg_num_rx_irqs,
2623                                         adapter->cfg_num_tx_irqs));
2624
2625         for_all_evt_queues(adapter, eqo, i) {
2626                 int numa_node = dev_to_node(&adapter->pdev->dev);
2627
2628                 aic = &adapter->aic_obj[i];
2629                 eqo->adapter = adapter;
2630                 eqo->idx = i;
2631                 aic->max_eqd = BE_MAX_EQD;
2632                 aic->enable = true;
2633
2634                 eq = &eqo->q;
2635                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2636                                     sizeof(struct be_eq_entry));
2637                 if (rc)
2638                         return rc;
2639
2640                 rc = be_cmd_eq_create(adapter, eqo);
2641                 if (rc)
2642                         return rc;
2643
2644                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2645                         return -ENOMEM;
2646                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2647                                 eqo->affinity_mask);
2648                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2649                                BE_NAPI_WEIGHT);
2650         }
2651         return 0;
2652 }
2653
2654 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2655 {
2656         struct be_queue_info *q;
2657
2658         q = &adapter->mcc_obj.q;
2659         if (q->created)
2660                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2661         be_queue_free(adapter, q);
2662
2663         q = &adapter->mcc_obj.cq;
2664         if (q->created)
2665                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2666         be_queue_free(adapter, q);
2667 }
2668
2669 /* Must be called only after TX qs are created as MCC shares TX EQ */
2670 static int be_mcc_queues_create(struct be_adapter *adapter)
2671 {
2672         struct be_queue_info *q, *cq;
2673
2674         cq = &adapter->mcc_obj.cq;
2675         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2676                            sizeof(struct be_mcc_compl)))
2677                 goto err;
2678
2679         /* Use the default EQ for MCC completions */
2680         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2681                 goto mcc_cq_free;
2682
2683         q = &adapter->mcc_obj.q;
2684         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2685                 goto mcc_cq_destroy;
2686
2687         if (be_cmd_mccq_create(adapter, q, cq))
2688                 goto mcc_q_free;
2689
2690         return 0;
2691
2692 mcc_q_free:
2693         be_queue_free(adapter, q);
2694 mcc_cq_destroy:
2695         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2696 mcc_cq_free:
2697         be_queue_free(adapter, cq);
2698 err:
2699         return -1;
2700 }
2701
2702 static void be_tx_queues_destroy(struct be_adapter *adapter)
2703 {
2704         struct be_queue_info *q;
2705         struct be_tx_obj *txo;
2706         u8 i;
2707
2708         for_all_tx_queues(adapter, txo, i) {
2709                 q = &txo->q;
2710                 if (q->created)
2711                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2712                 be_queue_free(adapter, q);
2713
2714                 q = &txo->cq;
2715                 if (q->created)
2716                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2717                 be_queue_free(adapter, q);
2718         }
2719 }
2720
2721 static int be_tx_qs_create(struct be_adapter *adapter)
2722 {
2723         struct be_queue_info *cq;
2724         struct be_tx_obj *txo;
2725         struct be_eq_obj *eqo;
2726         int status, i;
2727
2728         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2729
2730         for_all_tx_queues(adapter, txo, i) {
2731                 cq = &txo->cq;
2732                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2733                                         sizeof(struct be_eth_tx_compl));
2734                 if (status)
2735                         return status;
2736
2737                 u64_stats_init(&txo->stats.sync);
2738                 u64_stats_init(&txo->stats.sync_compl);
2739
2740                 /* If num_evt_qs is less than num_tx_qs, then more than
2741                  * one txq share an eq
2742                  */
2743                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2744                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2745                 if (status)
2746                         return status;
2747
2748                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2749                                         sizeof(struct be_eth_wrb));
2750                 if (status)
2751                         return status;
2752
2753                 status = be_cmd_txq_create(adapter, txo);
2754                 if (status)
2755                         return status;
2756
2757                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2758                                     eqo->idx);
2759         }
2760
2761         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2762                  adapter->num_tx_qs);
2763         return 0;
2764 }
2765
2766 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2767 {
2768         struct be_queue_info *q;
2769         struct be_rx_obj *rxo;
2770         int i;
2771
2772         for_all_rx_queues(adapter, rxo, i) {
2773                 q = &rxo->cq;
2774                 if (q->created)
2775                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2776                 be_queue_free(adapter, q);
2777         }
2778 }
2779
2780 static int be_rx_cqs_create(struct be_adapter *adapter)
2781 {
2782         struct be_queue_info *eq, *cq;
2783         struct be_rx_obj *rxo;
2784         int rc, i;
2785
2786         adapter->num_rss_qs =
2787                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2788
2789         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2790         if (adapter->num_rss_qs < 2)
2791                 adapter->num_rss_qs = 0;
2792
2793         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2794
2795         /* When the interface is not capable of RSS rings (and there is no
2796          * need to create a default RXQ) we'll still need one RXQ
2797          */
2798         if (adapter->num_rx_qs == 0)
2799                 adapter->num_rx_qs = 1;
2800
2801         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2802         for_all_rx_queues(adapter, rxo, i) {
2803                 rxo->adapter = adapter;
2804                 cq = &rxo->cq;
2805                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2806                                     sizeof(struct be_eth_rx_compl));
2807                 if (rc)
2808                         return rc;
2809
2810                 u64_stats_init(&rxo->stats.sync);
2811                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2812                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2813                 if (rc)
2814                         return rc;
2815         }
2816
2817         dev_info(&adapter->pdev->dev,
2818                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2819         return 0;
2820 }
2821
2822 static irqreturn_t be_intx(int irq, void *dev)
2823 {
2824         struct be_eq_obj *eqo = dev;
2825         struct be_adapter *adapter = eqo->adapter;
2826         int num_evts = 0;
2827
2828         /* IRQ is not expected when NAPI is scheduled as the EQ
2829          * will not be armed.
2830          * But, this can happen on Lancer INTx where it takes
2831          * a while to de-assert INTx or in BE2 where occasionaly
2832          * an interrupt may be raised even when EQ is unarmed.
2833          * If NAPI is already scheduled, then counting & notifying
2834          * events will orphan them.
2835          */
2836         if (napi_schedule_prep(&eqo->napi)) {
2837                 num_evts = events_get(eqo);
2838                 __napi_schedule(&eqo->napi);
2839                 if (num_evts)
2840                         eqo->spurious_intr = 0;
2841         }
2842         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2843
2844         /* Return IRQ_HANDLED only for the the first spurious intr
2845          * after a valid intr to stop the kernel from branding
2846          * this irq as a bad one!
2847          */
2848         if (num_evts || eqo->spurious_intr++ == 0)
2849                 return IRQ_HANDLED;
2850         else
2851                 return IRQ_NONE;
2852 }
2853
2854 static irqreturn_t be_msix(int irq, void *dev)
2855 {
2856         struct be_eq_obj *eqo = dev;
2857
2858         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2859         napi_schedule(&eqo->napi);
2860         return IRQ_HANDLED;
2861 }
2862
2863 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2864 {
2865         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2866 }
2867
2868 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2869                          int budget, int polling)
2870 {
2871         struct be_adapter *adapter = rxo->adapter;
2872         struct be_queue_info *rx_cq = &rxo->cq;
2873         struct be_rx_compl_info *rxcp;
2874         u32 work_done;
2875         u32 frags_consumed = 0;
2876
2877         for (work_done = 0; work_done < budget; work_done++) {
2878                 rxcp = be_rx_compl_get(rxo);
2879                 if (!rxcp)
2880                         break;
2881
2882                 /* Is it a flush compl that has no data */
2883                 if (unlikely(rxcp->num_rcvd == 0))
2884                         goto loop_continue;
2885
2886                 /* Discard compl with partial DMA Lancer B0 */
2887                 if (unlikely(!rxcp->pkt_size)) {
2888                         be_rx_compl_discard(rxo, rxcp);
2889                         goto loop_continue;
2890                 }
2891
2892                 /* On BE drop pkts that arrive due to imperfect filtering in
2893                  * promiscuous mode on some skews
2894                  */
2895                 if (unlikely(rxcp->port != adapter->port_num &&
2896                              !lancer_chip(adapter))) {
2897                         be_rx_compl_discard(rxo, rxcp);
2898                         goto loop_continue;
2899                 }
2900
2901                 /* Don't do gro when we're busy_polling */
2902                 if (do_gro(rxcp) && polling != BUSY_POLLING)
2903                         be_rx_compl_process_gro(rxo, napi, rxcp);
2904                 else
2905                         be_rx_compl_process(rxo, napi, rxcp);
2906
2907 loop_continue:
2908                 frags_consumed += rxcp->num_rcvd;
2909                 be_rx_stats_update(rxo, rxcp);
2910         }
2911
2912         if (work_done) {
2913                 be_cq_notify(adapter, rx_cq->id, true, work_done);
2914
2915                 /* When an rx-obj gets into post_starved state, just
2916                  * let be_worker do the posting.
2917                  */
2918                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
2919                     !rxo->rx_post_starved)
2920                         be_post_rx_frags(rxo, GFP_ATOMIC,
2921                                          max_t(u32, MAX_RX_POST,
2922                                                frags_consumed));
2923         }
2924
2925         return work_done;
2926 }
2927
2928 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2929 {
2930         switch (status) {
2931         case BE_TX_COMP_HDR_PARSE_ERR:
2932                 tx_stats(txo)->tx_hdr_parse_err++;
2933                 break;
2934         case BE_TX_COMP_NDMA_ERR:
2935                 tx_stats(txo)->tx_dma_err++;
2936                 break;
2937         case BE_TX_COMP_ACL_ERR:
2938                 tx_stats(txo)->tx_spoof_check_err++;
2939                 break;
2940         }
2941 }
2942
2943 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2944 {
2945         switch (status) {
2946         case LANCER_TX_COMP_LSO_ERR:
2947                 tx_stats(txo)->tx_tso_err++;
2948                 break;
2949         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2950         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2951                 tx_stats(txo)->tx_spoof_check_err++;
2952                 break;
2953         case LANCER_TX_COMP_QINQ_ERR:
2954                 tx_stats(txo)->tx_qinq_err++;
2955                 break;
2956         case LANCER_TX_COMP_PARITY_ERR:
2957                 tx_stats(txo)->tx_internal_parity_err++;
2958                 break;
2959         case LANCER_TX_COMP_DMA_ERR:
2960                 tx_stats(txo)->tx_dma_err++;
2961                 break;
2962         }
2963 }
2964
2965 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
2966                           int idx)
2967 {
2968         int num_wrbs = 0, work_done = 0;
2969         struct be_tx_compl_info *txcp;
2970
2971         while ((txcp = be_tx_compl_get(txo))) {
2972                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
2973                 work_done++;
2974
2975                 if (txcp->status) {
2976                         if (lancer_chip(adapter))
2977                                 lancer_update_tx_err(txo, txcp->status);
2978                         else
2979                                 be_update_tx_err(txo, txcp->status);
2980                 }
2981         }
2982
2983         if (work_done) {
2984                 be_cq_notify(adapter, txo->cq.id, true, work_done);
2985                 atomic_sub(num_wrbs, &txo->q.used);
2986
2987                 /* As Tx wrbs have been freed up, wake up netdev queue
2988                  * if it was stopped due to lack of tx wrbs.  */
2989                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
2990                     be_can_txq_wake(txo)) {
2991                         netif_wake_subqueue(adapter->netdev, idx);
2992                 }
2993
2994                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
2995                 tx_stats(txo)->tx_compl += work_done;
2996                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
2997         }
2998 }
2999
3000 #ifdef CONFIG_NET_RX_BUSY_POLL
3001 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3002 {
3003         bool status = true;
3004
3005         spin_lock(&eqo->lock); /* BH is already disabled */
3006         if (eqo->state & BE_EQ_LOCKED) {
3007                 WARN_ON(eqo->state & BE_EQ_NAPI);
3008                 eqo->state |= BE_EQ_NAPI_YIELD;
3009                 status = false;
3010         } else {
3011                 eqo->state = BE_EQ_NAPI;
3012         }
3013         spin_unlock(&eqo->lock);
3014         return status;
3015 }
3016
3017 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3018 {
3019         spin_lock(&eqo->lock); /* BH is already disabled */
3020
3021         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3022         eqo->state = BE_EQ_IDLE;
3023
3024         spin_unlock(&eqo->lock);
3025 }
3026
3027 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3028 {
3029         bool status = true;
3030
3031         spin_lock_bh(&eqo->lock);
3032         if (eqo->state & BE_EQ_LOCKED) {
3033                 eqo->state |= BE_EQ_POLL_YIELD;
3034                 status = false;
3035         } else {
3036                 eqo->state |= BE_EQ_POLL;
3037         }
3038         spin_unlock_bh(&eqo->lock);
3039         return status;
3040 }
3041
3042 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3043 {
3044         spin_lock_bh(&eqo->lock);
3045
3046         WARN_ON(eqo->state & (BE_EQ_NAPI));
3047         eqo->state = BE_EQ_IDLE;
3048
3049         spin_unlock_bh(&eqo->lock);
3050 }
3051
3052 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3053 {
3054         spin_lock_init(&eqo->lock);
3055         eqo->state = BE_EQ_IDLE;
3056 }
3057
3058 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3059 {
3060         local_bh_disable();
3061
3062         /* It's enough to just acquire napi lock on the eqo to stop
3063          * be_busy_poll() from processing any queueus.
3064          */
3065         while (!be_lock_napi(eqo))
3066                 mdelay(1);
3067
3068         local_bh_enable();
3069 }
3070
3071 #else /* CONFIG_NET_RX_BUSY_POLL */
3072
3073 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3074 {
3075         return true;
3076 }
3077
3078 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3079 {
3080 }
3081
3082 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3083 {
3084         return false;
3085 }
3086
3087 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3088 {
3089 }
3090
3091 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3092 {
3093 }
3094
3095 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3096 {
3097 }
3098 #endif /* CONFIG_NET_RX_BUSY_POLL */
3099
3100 int be_poll(struct napi_struct *napi, int budget)
3101 {
3102         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3103         struct be_adapter *adapter = eqo->adapter;
3104         int max_work = 0, work, i, num_evts;
3105         struct be_rx_obj *rxo;
3106         struct be_tx_obj *txo;
3107         u32 mult_enc = 0;
3108
3109         num_evts = events_get(eqo);
3110
3111         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3112                 be_process_tx(adapter, txo, i);
3113
3114         if (be_lock_napi(eqo)) {
3115                 /* This loop will iterate twice for EQ0 in which
3116                  * completions of the last RXQ (default one) are also processed
3117                  * For other EQs the loop iterates only once
3118                  */
3119                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3120                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3121                         max_work = max(work, max_work);
3122                 }
3123                 be_unlock_napi(eqo);
3124         } else {
3125                 max_work = budget;
3126         }
3127
3128         if (is_mcc_eqo(eqo))
3129                 be_process_mcc(adapter);
3130
3131         if (max_work < budget) {
3132                 napi_complete(napi);
3133
3134                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3135                  * delay via a delay multiplier encoding value
3136                  */
3137                 if (skyhawk_chip(adapter))
3138                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3139
3140                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3141                              mult_enc);
3142         } else {
3143                 /* As we'll continue in polling mode, count and clear events */
3144                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3145         }
3146         return max_work;
3147 }
3148
3149 #ifdef CONFIG_NET_RX_BUSY_POLL
3150 static int be_busy_poll(struct napi_struct *napi)
3151 {
3152         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3153         struct be_adapter *adapter = eqo->adapter;
3154         struct be_rx_obj *rxo;
3155         int i, work = 0;
3156
3157         if (!be_lock_busy_poll(eqo))
3158                 return LL_FLUSH_BUSY;
3159
3160         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3161                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3162                 if (work)
3163                         break;
3164         }
3165
3166         be_unlock_busy_poll(eqo);
3167         return work;
3168 }
3169 #endif
3170
3171 void be_detect_error(struct be_adapter *adapter)
3172 {
3173         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3174         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3175         u32 i;
3176         struct device *dev = &adapter->pdev->dev;
3177
3178         if (be_check_error(adapter, BE_ERROR_HW))
3179                 return;
3180
3181         if (lancer_chip(adapter)) {
3182                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3183                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3184                         be_set_error(adapter, BE_ERROR_UE);
3185                         sliport_err1 = ioread32(adapter->db +
3186                                                 SLIPORT_ERROR1_OFFSET);
3187                         sliport_err2 = ioread32(adapter->db +
3188                                                 SLIPORT_ERROR2_OFFSET);
3189                         /* Do not log error messages if its a FW reset */
3190                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3191                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3192                                 dev_info(dev, "Firmware update in progress\n");
3193                         } else {
3194                                 dev_err(dev, "Error detected in the card\n");
3195                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3196                                         sliport_status);
3197                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3198                                         sliport_err1);
3199                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3200                                         sliport_err2);
3201                         }
3202                 }
3203         } else {
3204                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3205                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3206                 ue_lo_mask = ioread32(adapter->pcicfg +
3207                                       PCICFG_UE_STATUS_LOW_MASK);
3208                 ue_hi_mask = ioread32(adapter->pcicfg +
3209                                       PCICFG_UE_STATUS_HI_MASK);
3210
3211                 ue_lo = (ue_lo & ~ue_lo_mask);
3212                 ue_hi = (ue_hi & ~ue_hi_mask);
3213
3214                 /* On certain platforms BE hardware can indicate spurious UEs.
3215                  * Allow HW to stop working completely in case of a real UE.
3216                  * Hence not setting the hw_error for UE detection.
3217                  */
3218
3219                 if (ue_lo || ue_hi) {
3220                         dev_err(dev,
3221                                 "Unrecoverable Error detected in the adapter");
3222                         dev_err(dev, "Please reboot server to recover");
3223                         if (skyhawk_chip(adapter))
3224                                 be_set_error(adapter, BE_ERROR_UE);
3225
3226                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3227                                 if (ue_lo & 1)
3228                                         dev_err(dev, "UE: %s bit set\n",
3229                                                 ue_status_low_desc[i]);
3230                         }
3231                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3232                                 if (ue_hi & 1)
3233                                         dev_err(dev, "UE: %s bit set\n",
3234                                                 ue_status_hi_desc[i]);
3235                         }
3236                 }
3237         }
3238 }
3239
3240 static void be_msix_disable(struct be_adapter *adapter)
3241 {
3242         if (msix_enabled(adapter)) {
3243                 pci_disable_msix(adapter->pdev);
3244                 adapter->num_msix_vec = 0;
3245                 adapter->num_msix_roce_vec = 0;
3246         }
3247 }
3248
3249 static int be_msix_enable(struct be_adapter *adapter)
3250 {
3251         unsigned int i, max_roce_eqs;
3252         struct device *dev = &adapter->pdev->dev;
3253         int num_vec;
3254
3255         /* If RoCE is supported, program the max number of vectors that
3256          * could be used for NIC and RoCE, else, just program the number
3257          * we'll use initially.
3258          */
3259         if (be_roce_supported(adapter)) {
3260                 max_roce_eqs =
3261                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3262                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3263                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3264         } else {
3265                 num_vec = max(adapter->cfg_num_rx_irqs,
3266                               adapter->cfg_num_tx_irqs);
3267         }
3268
3269         for (i = 0; i < num_vec; i++)
3270                 adapter->msix_entries[i].entry = i;
3271
3272         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3273                                         MIN_MSIX_VECTORS, num_vec);
3274         if (num_vec < 0)
3275                 goto fail;
3276
3277         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3278                 adapter->num_msix_roce_vec = num_vec / 2;
3279                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3280                          adapter->num_msix_roce_vec);
3281         }
3282
3283         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3284
3285         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3286                  adapter->num_msix_vec);
3287         return 0;
3288
3289 fail:
3290         dev_warn(dev, "MSIx enable failed\n");
3291
3292         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3293         if (be_virtfn(adapter))
3294                 return num_vec;
3295         return 0;
3296 }
3297
3298 static inline int be_msix_vec_get(struct be_adapter *adapter,
3299                                   struct be_eq_obj *eqo)
3300 {
3301         return adapter->msix_entries[eqo->msix_idx].vector;
3302 }
3303
3304 static int be_msix_register(struct be_adapter *adapter)
3305 {
3306         struct net_device *netdev = adapter->netdev;
3307         struct be_eq_obj *eqo;
3308         int status, i, vec;
3309
3310         for_all_evt_queues(adapter, eqo, i) {
3311                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3312                 vec = be_msix_vec_get(adapter, eqo);
3313                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3314                 if (status)
3315                         goto err_msix;
3316
3317                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3318         }
3319
3320         return 0;
3321 err_msix:
3322         for (i--; i >= 0; i--) {
3323                 eqo = &adapter->eq_obj[i];
3324                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3325         }
3326         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3327                  status);
3328         be_msix_disable(adapter);
3329         return status;
3330 }
3331
3332 static int be_irq_register(struct be_adapter *adapter)
3333 {
3334         struct net_device *netdev = adapter->netdev;
3335         int status;
3336
3337         if (msix_enabled(adapter)) {
3338                 status = be_msix_register(adapter);
3339                 if (status == 0)
3340                         goto done;
3341                 /* INTx is not supported for VF */
3342                 if (be_virtfn(adapter))
3343                         return status;
3344         }
3345
3346         /* INTx: only the first EQ is used */
3347         netdev->irq = adapter->pdev->irq;
3348         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3349                              &adapter->eq_obj[0]);
3350         if (status) {
3351                 dev_err(&adapter->pdev->dev,
3352                         "INTx request IRQ failed - err %d\n", status);
3353                 return status;
3354         }
3355 done:
3356         adapter->isr_registered = true;
3357         return 0;
3358 }
3359
3360 static void be_irq_unregister(struct be_adapter *adapter)
3361 {
3362         struct net_device *netdev = adapter->netdev;
3363         struct be_eq_obj *eqo;
3364         int i, vec;
3365
3366         if (!adapter->isr_registered)
3367                 return;
3368
3369         /* INTx */
3370         if (!msix_enabled(adapter)) {
3371                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3372                 goto done;
3373         }
3374
3375         /* MSIx */
3376         for_all_evt_queues(adapter, eqo, i) {
3377                 vec = be_msix_vec_get(adapter, eqo);
3378                 irq_set_affinity_hint(vec, NULL);
3379                 free_irq(vec, eqo);
3380         }
3381
3382 done:
3383         adapter->isr_registered = false;
3384 }
3385
3386 static void be_rx_qs_destroy(struct be_adapter *adapter)
3387 {
3388         struct rss_info *rss = &adapter->rss_info;
3389         struct be_queue_info *q;
3390         struct be_rx_obj *rxo;
3391         int i;
3392
3393         for_all_rx_queues(adapter, rxo, i) {
3394                 q = &rxo->q;
3395                 if (q->created) {
3396                         /* If RXQs are destroyed while in an "out of buffer"
3397                          * state, there is a possibility of an HW stall on
3398                          * Lancer. So, post 64 buffers to each queue to relieve
3399                          * the "out of buffer" condition.
3400                          * Make sure there's space in the RXQ before posting.
3401                          */
3402                         if (lancer_chip(adapter)) {
3403                                 be_rx_cq_clean(rxo);
3404                                 if (atomic_read(&q->used) == 0)
3405                                         be_post_rx_frags(rxo, GFP_KERNEL,
3406                                                          MAX_RX_POST);
3407                         }
3408
3409                         be_cmd_rxq_destroy(adapter, q);
3410                         be_rx_cq_clean(rxo);
3411                         be_rxq_clean(rxo);
3412                 }
3413                 be_queue_free(adapter, q);
3414         }
3415
3416         if (rss->rss_flags) {
3417                 rss->rss_flags = RSS_ENABLE_NONE;
3418                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3419                                   128, rss->rss_hkey);
3420         }
3421 }
3422
3423 static void be_disable_if_filters(struct be_adapter *adapter)
3424 {
3425         be_cmd_pmac_del(adapter, adapter->if_handle,
3426                         adapter->pmac_id[0], 0);
3427
3428         be_clear_uc_list(adapter);
3429
3430         /* The IFACE flags are enabled in the open path and cleared
3431          * in the close path. When a VF gets detached from the host and
3432          * assigned to a VM the following happens:
3433          *      - VF's IFACE flags get cleared in the detach path
3434          *      - IFACE create is issued by the VF in the attach path
3435          * Due to a bug in the BE3/Skyhawk-R FW
3436          * (Lancer FW doesn't have the bug), the IFACE capability flags
3437          * specified along with the IFACE create cmd issued by a VF are not
3438          * honoured by FW.  As a consequence, if a *new* driver
3439          * (that enables/disables IFACE flags in open/close)
3440          * is loaded in the host and an *old* driver is * used by a VM/VF,
3441          * the IFACE gets created *without* the needed flags.
3442          * To avoid this, disable RX-filter flags only for Lancer.
3443          */
3444         if (lancer_chip(adapter)) {
3445                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3446                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3447         }
3448 }
3449
3450 static int be_close(struct net_device *netdev)
3451 {
3452         struct be_adapter *adapter = netdev_priv(netdev);
3453         struct be_eq_obj *eqo;
3454         int i;
3455
3456         /* This protection is needed as be_close() may be called even when the
3457          * adapter is in cleared state (after eeh perm failure)
3458          */
3459         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3460                 return 0;
3461
3462         be_disable_if_filters(adapter);
3463
3464         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3465                 for_all_evt_queues(adapter, eqo, i) {
3466                         napi_disable(&eqo->napi);
3467                         be_disable_busy_poll(eqo);
3468                 }
3469                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3470         }
3471
3472         be_async_mcc_disable(adapter);
3473
3474         /* Wait for all pending tx completions to arrive so that
3475          * all tx skbs are freed.
3476          */
3477         netif_tx_disable(netdev);
3478         be_tx_compl_clean(adapter);
3479
3480         be_rx_qs_destroy(adapter);
3481
3482         for_all_evt_queues(adapter, eqo, i) {
3483                 if (msix_enabled(adapter))
3484                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3485                 else
3486                         synchronize_irq(netdev->irq);
3487                 be_eq_clean(eqo);
3488         }
3489
3490         be_irq_unregister(adapter);
3491
3492         return 0;
3493 }
3494
3495 static int be_rx_qs_create(struct be_adapter *adapter)
3496 {
3497         struct rss_info *rss = &adapter->rss_info;
3498         u8 rss_key[RSS_HASH_KEY_LEN];
3499         struct be_rx_obj *rxo;
3500         int rc, i, j;
3501
3502         for_all_rx_queues(adapter, rxo, i) {
3503                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3504                                     sizeof(struct be_eth_rx_d));
3505                 if (rc)
3506                         return rc;
3507         }
3508
3509         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3510                 rxo = default_rxo(adapter);
3511                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3512                                        rx_frag_size, adapter->if_handle,
3513                                        false, &rxo->rss_id);
3514                 if (rc)
3515                         return rc;
3516         }
3517
3518         for_all_rss_queues(adapter, rxo, i) {
3519                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3520                                        rx_frag_size, adapter->if_handle,
3521                                        true, &rxo->rss_id);
3522                 if (rc)
3523                         return rc;
3524         }
3525
3526         if (be_multi_rxq(adapter)) {
3527                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3528                         for_all_rss_queues(adapter, rxo, i) {
3529                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3530                                         break;
3531                                 rss->rsstable[j + i] = rxo->rss_id;
3532                                 rss->rss_queue[j + i] = i;
3533                         }
3534                 }
3535                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3536                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3537
3538                 if (!BEx_chip(adapter))
3539                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3540                                 RSS_ENABLE_UDP_IPV6;
3541
3542                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3543                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3544                                        RSS_INDIR_TABLE_LEN, rss_key);
3545                 if (rc) {
3546                         rss->rss_flags = RSS_ENABLE_NONE;
3547                         return rc;
3548                 }
3549
3550                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3551         } else {
3552                 /* Disable RSS, if only default RX Q is created */
3553                 rss->rss_flags = RSS_ENABLE_NONE;
3554         }
3555
3556
3557         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3558          * which is a queue empty condition
3559          */
3560         for_all_rx_queues(adapter, rxo, i)
3561                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3562
3563         return 0;
3564 }
3565
3566 static int be_enable_if_filters(struct be_adapter *adapter)
3567 {
3568         int status;
3569
3570         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3571         if (status)
3572                 return status;
3573
3574         /* For BE3 VFs, the PF programs the initial MAC address */
3575         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3576                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3577                                          adapter->if_handle,
3578                                          &adapter->pmac_id[0], 0);
3579                 if (status)
3580                         return status;
3581         }
3582
3583         if (adapter->vlans_added)
3584                 be_vid_config(adapter);
3585
3586         be_set_rx_mode(adapter->netdev);
3587
3588         return 0;
3589 }
3590
3591 static int be_open(struct net_device *netdev)
3592 {
3593         struct be_adapter *adapter = netdev_priv(netdev);
3594         struct be_eq_obj *eqo;
3595         struct be_rx_obj *rxo;
3596         struct be_tx_obj *txo;
3597         u8 link_status;
3598         int status, i;
3599
3600         status = be_rx_qs_create(adapter);
3601         if (status)
3602                 goto err;
3603
3604         status = be_enable_if_filters(adapter);
3605         if (status)
3606                 goto err;
3607
3608         status = be_irq_register(adapter);
3609         if (status)
3610                 goto err;
3611
3612         for_all_rx_queues(adapter, rxo, i)
3613                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3614
3615         for_all_tx_queues(adapter, txo, i)
3616                 be_cq_notify(adapter, txo->cq.id, true, 0);
3617
3618         be_async_mcc_enable(adapter);
3619
3620         for_all_evt_queues(adapter, eqo, i) {
3621                 napi_enable(&eqo->napi);
3622                 be_enable_busy_poll(eqo);
3623                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3624         }
3625         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3626
3627         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3628         if (!status)
3629                 be_link_status_update(adapter, link_status);
3630
3631         netif_tx_start_all_queues(netdev);
3632         if (skyhawk_chip(adapter))
3633                 udp_tunnel_get_rx_info(netdev);
3634
3635         return 0;
3636 err:
3637         be_close(adapter->netdev);
3638         return -EIO;
3639 }
3640
3641 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3642 {
3643         u32 addr;
3644
3645         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3646
3647         mac[5] = (u8)(addr & 0xFF);
3648         mac[4] = (u8)((addr >> 8) & 0xFF);
3649         mac[3] = (u8)((addr >> 16) & 0xFF);
3650         /* Use the OUI from the current MAC address */
3651         memcpy(mac, adapter->netdev->dev_addr, 3);
3652 }
3653
3654 /*
3655  * Generate a seed MAC address from the PF MAC Address using jhash.
3656  * MAC Address for VFs are assigned incrementally starting from the seed.
3657  * These addresses are programmed in the ASIC by the PF and the VF driver
3658  * queries for the MAC address during its probe.
3659  */
3660 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3661 {
3662         u32 vf;
3663         int status = 0;
3664         u8 mac[ETH_ALEN];
3665         struct be_vf_cfg *vf_cfg;
3666
3667         be_vf_eth_addr_generate(adapter, mac);
3668
3669         for_all_vfs(adapter, vf_cfg, vf) {
3670                 if (BEx_chip(adapter))
3671                         status = be_cmd_pmac_add(adapter, mac,
3672                                                  vf_cfg->if_handle,
3673                                                  &vf_cfg->pmac_id, vf + 1);
3674                 else
3675                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3676                                                 vf + 1);
3677
3678                 if (status)
3679                         dev_err(&adapter->pdev->dev,
3680                                 "Mac address assignment failed for VF %d\n",
3681                                 vf);
3682                 else
3683                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3684
3685                 mac[5] += 1;
3686         }
3687         return status;
3688 }
3689
3690 static int be_vfs_mac_query(struct be_adapter *adapter)
3691 {
3692         int status, vf;
3693         u8 mac[ETH_ALEN];
3694         struct be_vf_cfg *vf_cfg;
3695
3696         for_all_vfs(adapter, vf_cfg, vf) {
3697                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3698                                                mac, vf_cfg->if_handle,
3699                                                false, vf+1);
3700                 if (status)
3701                         return status;
3702                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3703         }
3704         return 0;
3705 }
3706
3707 static void be_vf_clear(struct be_adapter *adapter)
3708 {
3709         struct be_vf_cfg *vf_cfg;
3710         u32 vf;
3711
3712         if (pci_vfs_assigned(adapter->pdev)) {
3713                 dev_warn(&adapter->pdev->dev,
3714                          "VFs are assigned to VMs: not disabling VFs\n");
3715                 goto done;
3716         }
3717
3718         pci_disable_sriov(adapter->pdev);
3719
3720         for_all_vfs(adapter, vf_cfg, vf) {
3721                 if (BEx_chip(adapter))
3722                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3723                                         vf_cfg->pmac_id, vf + 1);
3724                 else
3725                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3726                                        vf + 1);
3727
3728                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3729         }
3730
3731         if (BE3_chip(adapter))
3732                 be_cmd_set_hsw_config(adapter, 0, 0,
3733                                       adapter->if_handle,
3734                                       PORT_FWD_TYPE_PASSTHRU, 0);
3735 done:
3736         kfree(adapter->vf_cfg);
3737         adapter->num_vfs = 0;
3738         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3739 }
3740
3741 static void be_clear_queues(struct be_adapter *adapter)
3742 {
3743         be_mcc_queues_destroy(adapter);
3744         be_rx_cqs_destroy(adapter);
3745         be_tx_queues_destroy(adapter);
3746         be_evt_queues_destroy(adapter);
3747 }
3748
3749 static void be_cancel_worker(struct be_adapter *adapter)
3750 {
3751         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3752                 cancel_delayed_work_sync(&adapter->work);
3753                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3754         }
3755 }
3756
3757 static void be_cancel_err_detection(struct be_adapter *adapter)
3758 {
3759         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3760                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3761                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3762         }
3763 }
3764
3765 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3766 {
3767         struct net_device *netdev = adapter->netdev;
3768
3769         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3770                 be_cmd_manage_iface(adapter, adapter->if_handle,
3771                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3772
3773         if (adapter->vxlan_port)
3774                 be_cmd_set_vxlan_port(adapter, 0);
3775
3776         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3777         adapter->vxlan_port = 0;
3778
3779         netdev->hw_enc_features = 0;
3780         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3781         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3782 }
3783
3784 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3785                                 struct be_resources *vft_res)
3786 {
3787         struct be_resources res = adapter->pool_res;
3788         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3789         struct be_resources res_mod = {0};
3790         u16 num_vf_qs = 1;
3791
3792         /* Distribute the queue resources among the PF and it's VFs */
3793         if (num_vfs) {
3794                 /* Divide the rx queues evenly among the VFs and the PF, capped
3795                  * at VF-EQ-count. Any remainder queues belong to the PF.
3796                  */
3797                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3798                                 res.max_rss_qs / (num_vfs + 1));
3799
3800                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3801                  * RSS Tables per port. Provide RSS on VFs, only if number of
3802                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3803                  */
3804                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3805                         num_vf_qs = 1;
3806         }
3807
3808         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3809          * which are modifiable using SET_PROFILE_CONFIG cmd.
3810          */
3811         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3812                                   RESOURCE_MODIFIABLE, 0);
3813
3814         /* If RSS IFACE capability flags are modifiable for a VF, set the
3815          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3816          * more than 1 RSSQ is available for a VF.
3817          * Otherwise, provision only 1 queue pair for VF.
3818          */
3819         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3820                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3821                 if (num_vf_qs > 1) {
3822                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3823                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3824                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3825                 } else {
3826                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3827                                              BE_IF_FLAGS_DEFQ_RSS);
3828                 }
3829         } else {
3830                 num_vf_qs = 1;
3831         }
3832
3833         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3834                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3835                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3836         }
3837
3838         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3839         vft_res->max_rx_qs = num_vf_qs;
3840         vft_res->max_rss_qs = num_vf_qs;
3841         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3842         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3843
3844         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3845          * among the PF and it's VFs, if the fields are changeable
3846          */
3847         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3848                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3849
3850         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3851                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3852
3853         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3854                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3855
3856         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3857                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3858 }
3859
3860 static int be_clear(struct be_adapter *adapter)
3861 {
3862         struct pci_dev *pdev = adapter->pdev;
3863         struct  be_resources vft_res = {0};
3864
3865         be_cancel_worker(adapter);
3866
3867         if (sriov_enabled(adapter))
3868                 be_vf_clear(adapter);
3869
3870         /* Re-configure FW to distribute resources evenly across max-supported
3871          * number of VFs, only when VFs are not already enabled.
3872          */
3873         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3874             !pci_vfs_assigned(pdev)) {
3875                 be_calculate_vf_res(adapter,
3876                                     pci_sriov_get_totalvfs(pdev),
3877                                     &vft_res);
3878                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3879                                         pci_sriov_get_totalvfs(pdev),
3880                                         &vft_res);
3881         }
3882
3883         be_disable_vxlan_offloads(adapter);
3884         kfree(adapter->pmac_id);
3885         adapter->pmac_id = NULL;
3886
3887         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3888
3889         be_clear_queues(adapter);
3890
3891         be_msix_disable(adapter);
3892         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3893         return 0;
3894 }
3895
3896 static int be_vfs_if_create(struct be_adapter *adapter)
3897 {
3898         struct be_resources res = {0};
3899         u32 cap_flags, en_flags, vf;
3900         struct be_vf_cfg *vf_cfg;
3901         int status;
3902
3903         /* If a FW profile exists, then cap_flags are updated */
3904         cap_flags = BE_VF_IF_EN_FLAGS;
3905
3906         for_all_vfs(adapter, vf_cfg, vf) {
3907                 if (!BE3_chip(adapter)) {
3908                         status = be_cmd_get_profile_config(adapter, &res, NULL,
3909                                                            ACTIVE_PROFILE_TYPE,
3910                                                            RESOURCE_LIMITS,
3911                                                            vf + 1);
3912                         if (!status) {
3913                                 cap_flags = res.if_cap_flags;
3914                                 /* Prevent VFs from enabling VLAN promiscuous
3915                                  * mode
3916                                  */
3917                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3918                         }
3919                 }
3920
3921                 /* PF should enable IF flags during proxy if_create call */
3922                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
3923                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
3924                                           &vf_cfg->if_handle, vf + 1);
3925                 if (status)
3926                         return status;
3927         }
3928
3929         return 0;
3930 }
3931
3932 static int be_vf_setup_init(struct be_adapter *adapter)
3933 {
3934         struct be_vf_cfg *vf_cfg;
3935         int vf;
3936
3937         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
3938                                   GFP_KERNEL);
3939         if (!adapter->vf_cfg)
3940                 return -ENOMEM;
3941
3942         for_all_vfs(adapter, vf_cfg, vf) {
3943                 vf_cfg->if_handle = -1;
3944                 vf_cfg->pmac_id = -1;
3945         }
3946         return 0;
3947 }
3948
3949 static int be_vf_setup(struct be_adapter *adapter)
3950 {
3951         struct device *dev = &adapter->pdev->dev;
3952         struct be_vf_cfg *vf_cfg;
3953         int status, old_vfs, vf;
3954         bool spoofchk;
3955
3956         old_vfs = pci_num_vf(adapter->pdev);
3957
3958         status = be_vf_setup_init(adapter);
3959         if (status)
3960                 goto err;
3961
3962         if (old_vfs) {
3963                 for_all_vfs(adapter, vf_cfg, vf) {
3964                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
3965                         if (status)
3966                                 goto err;
3967                 }
3968
3969                 status = be_vfs_mac_query(adapter);
3970                 if (status)
3971                         goto err;
3972         } else {
3973                 status = be_vfs_if_create(adapter);
3974                 if (status)
3975                         goto err;
3976
3977                 status = be_vf_eth_addr_config(adapter);
3978                 if (status)
3979                         goto err;
3980         }
3981
3982         for_all_vfs(adapter, vf_cfg, vf) {
3983                 /* Allow VFs to programs MAC/VLAN filters */
3984                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
3985                                                   vf + 1);
3986                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
3987                         status = be_cmd_set_fn_privileges(adapter,
3988                                                           vf_cfg->privileges |
3989                                                           BE_PRIV_FILTMGMT,
3990                                                           vf + 1);
3991                         if (!status) {
3992                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
3993                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
3994                                          vf);
3995                         }
3996                 }
3997
3998                 /* Allow full available bandwidth */
3999                 if (!old_vfs)
4000                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4001
4002                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4003                                                vf_cfg->if_handle, NULL,
4004                                                &spoofchk);
4005                 if (!status)
4006                         vf_cfg->spoofchk = spoofchk;
4007
4008                 if (!old_vfs) {
4009                         be_cmd_enable_vf(adapter, vf + 1);
4010                         be_cmd_set_logical_link_config(adapter,
4011                                                        IFLA_VF_LINK_STATE_AUTO,
4012                                                        vf+1);
4013                 }
4014         }
4015
4016         if (!old_vfs) {
4017                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4018                 if (status) {
4019                         dev_err(dev, "SRIOV enable failed\n");
4020                         adapter->num_vfs = 0;
4021                         goto err;
4022                 }
4023         }
4024
4025         if (BE3_chip(adapter)) {
4026                 /* On BE3, enable VEB only when SRIOV is enabled */
4027                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4028                                                adapter->if_handle,
4029                                                PORT_FWD_TYPE_VEB, 0);
4030                 if (status)
4031                         goto err;
4032         }
4033
4034         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4035         return 0;
4036 err:
4037         dev_err(dev, "VF setup failed\n");
4038         be_vf_clear(adapter);
4039         return status;
4040 }
4041
4042 /* Converting function_mode bits on BE3 to SH mc_type enums */
4043
4044 static u8 be_convert_mc_type(u32 function_mode)
4045 {
4046         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4047                 return vNIC1;
4048         else if (function_mode & QNQ_MODE)
4049                 return FLEX10;
4050         else if (function_mode & VNIC_MODE)
4051                 return vNIC2;
4052         else if (function_mode & UMC_ENABLED)
4053                 return UMC;
4054         else
4055                 return MC_NONE;
4056 }
4057
4058 /* On BE2/BE3 FW does not suggest the supported limits */
4059 static void BEx_get_resources(struct be_adapter *adapter,
4060                               struct be_resources *res)
4061 {
4062         bool use_sriov = adapter->num_vfs ? 1 : 0;
4063
4064         if (be_physfn(adapter))
4065                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4066         else
4067                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4068
4069         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4070
4071         if (be_is_mc(adapter)) {
4072                 /* Assuming that there are 4 channels per port,
4073                  * when multi-channel is enabled
4074                  */
4075                 if (be_is_qnq_mode(adapter))
4076                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4077                 else
4078                         /* In a non-qnq multichannel mode, the pvid
4079                          * takes up one vlan entry
4080                          */
4081                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4082         } else {
4083                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4084         }
4085
4086         res->max_mcast_mac = BE_MAX_MC;
4087
4088         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4089          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4090          *    *only* if it is RSS-capable.
4091          */
4092         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4093             be_virtfn(adapter) ||
4094             (be_is_mc(adapter) &&
4095              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4096                 res->max_tx_qs = 1;
4097         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4098                 struct be_resources super_nic_res = {0};
4099
4100                 /* On a SuperNIC profile, the driver needs to use the
4101                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4102                  */
4103                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4104                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4105                                           0);
4106                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4107                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4108         } else {
4109                 res->max_tx_qs = BE3_MAX_TX_QS;
4110         }
4111
4112         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4113             !use_sriov && be_physfn(adapter))
4114                 res->max_rss_qs = (adapter->be3_native) ?
4115                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4116         res->max_rx_qs = res->max_rss_qs + 1;
4117
4118         if (be_physfn(adapter))
4119                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4120                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4121         else
4122                 res->max_evt_qs = 1;
4123
4124         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4125         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4126         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4127                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4128 }
4129
4130 static void be_setup_init(struct be_adapter *adapter)
4131 {
4132         adapter->vlan_prio_bmap = 0xff;
4133         adapter->phy.link_speed = -1;
4134         adapter->if_handle = -1;
4135         adapter->be3_native = false;
4136         adapter->if_flags = 0;
4137         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4138         if (be_physfn(adapter))
4139                 adapter->cmd_privileges = MAX_PRIVILEGES;
4140         else
4141                 adapter->cmd_privileges = MIN_PRIVILEGES;
4142 }
4143
4144 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4145  * However, this HW limitation is not exposed to the host via any SLI cmd.
4146  * As a result, in the case of SRIOV and in particular multi-partition configs
4147  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4148  * for distribution between the VFs. This self-imposed limit will determine the
4149  * no: of VFs for which RSS can be enabled.
4150  */
4151 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4152 {
4153         struct be_port_resources port_res = {0};
4154         u8 rss_tables_on_port;
4155         u16 max_vfs = be_max_vfs(adapter);
4156
4157         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4158                                   RESOURCE_LIMITS, 0);
4159
4160         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4161
4162         /* Each PF Pool's RSS Tables limit =
4163          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4164          */
4165         adapter->pool_res.max_rss_tables =
4166                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4167 }
4168
4169 static int be_get_sriov_config(struct be_adapter *adapter)
4170 {
4171         struct be_resources res = {0};
4172         int max_vfs, old_vfs;
4173
4174         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4175                                   RESOURCE_LIMITS, 0);
4176
4177         /* Some old versions of BE3 FW don't report max_vfs value */
4178         if (BE3_chip(adapter) && !res.max_vfs) {
4179                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4180                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4181         }
4182
4183         adapter->pool_res = res;
4184
4185         /* If during previous unload of the driver, the VFs were not disabled,
4186          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4187          * Instead use the TotalVFs value stored in the pci-dev struct.
4188          */
4189         old_vfs = pci_num_vf(adapter->pdev);
4190         if (old_vfs) {
4191                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4192                          old_vfs);
4193
4194                 adapter->pool_res.max_vfs =
4195                         pci_sriov_get_totalvfs(adapter->pdev);
4196                 adapter->num_vfs = old_vfs;
4197         }
4198
4199         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4200                 be_calculate_pf_pool_rss_tables(adapter);
4201                 dev_info(&adapter->pdev->dev,
4202                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4203                          be_max_pf_pool_rss_tables(adapter));
4204         }
4205         return 0;
4206 }
4207
4208 static void be_alloc_sriov_res(struct be_adapter *adapter)
4209 {
4210         int old_vfs = pci_num_vf(adapter->pdev);
4211         struct  be_resources vft_res = {0};
4212         int status;
4213
4214         be_get_sriov_config(adapter);
4215
4216         if (!old_vfs)
4217                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4218
4219         /* When the HW is in SRIOV capable configuration, the PF-pool
4220          * resources are given to PF during driver load, if there are no
4221          * old VFs. This facility is not available in BE3 FW.
4222          * Also, this is done by FW in Lancer chip.
4223          */
4224         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4225                 be_calculate_vf_res(adapter, 0, &vft_res);
4226                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4227                                                  &vft_res);
4228                 if (status)
4229                         dev_err(&adapter->pdev->dev,
4230                                 "Failed to optimize SRIOV resources\n");
4231         }
4232 }
4233
4234 static int be_get_resources(struct be_adapter *adapter)
4235 {
4236         struct device *dev = &adapter->pdev->dev;
4237         struct be_resources res = {0};
4238         int status;
4239
4240         /* For Lancer, SH etc read per-function resource limits from FW.
4241          * GET_FUNC_CONFIG returns per function guaranteed limits.
4242          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4243          */
4244         if (BEx_chip(adapter)) {
4245                 BEx_get_resources(adapter, &res);
4246         } else {
4247                 status = be_cmd_get_func_config(adapter, &res);
4248                 if (status)
4249                         return status;
4250
4251                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4252                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4253                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4254                         res.max_rss_qs -= 1;
4255         }
4256
4257         /* If RoCE is supported stash away half the EQs for RoCE */
4258         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4259                                 res.max_evt_qs / 2 : res.max_evt_qs;
4260         adapter->res = res;
4261
4262         /* If FW supports RSS default queue, then skip creating non-RSS
4263          * queue for non-IP traffic.
4264          */
4265         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4266                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4267
4268         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4269                  be_max_txqs(adapter), be_max_rxqs(adapter),
4270                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4271                  be_max_vfs(adapter));
4272         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4273                  be_max_uc(adapter), be_max_mc(adapter),
4274                  be_max_vlans(adapter));
4275
4276         /* Ensure RX and TX queues are created in pairs at init time */
4277         adapter->cfg_num_rx_irqs =
4278                                 min_t(u16, netif_get_num_default_rss_queues(),
4279                                       be_max_qp_irqs(adapter));
4280         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4281         return 0;
4282 }
4283
4284 static int be_get_config(struct be_adapter *adapter)
4285 {
4286         int status, level;
4287         u16 profile_id;
4288
4289         status = be_cmd_get_cntl_attributes(adapter);
4290         if (status)
4291                 return status;
4292
4293         status = be_cmd_query_fw_cfg(adapter);
4294         if (status)
4295                 return status;
4296
4297         if (!lancer_chip(adapter) && be_physfn(adapter))
4298                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4299
4300         if (BEx_chip(adapter)) {
4301                 level = be_cmd_get_fw_log_level(adapter);
4302                 adapter->msg_enable =
4303                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4304         }
4305
4306         be_cmd_get_acpi_wol_cap(adapter);
4307         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4308         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4309
4310         be_cmd_query_port_name(adapter);
4311
4312         if (be_physfn(adapter)) {
4313                 status = be_cmd_get_active_profile(adapter, &profile_id);
4314                 if (!status)
4315                         dev_info(&adapter->pdev->dev,
4316                                  "Using profile 0x%x\n", profile_id);
4317         }
4318
4319         return 0;
4320 }
4321
4322 static int be_mac_setup(struct be_adapter *adapter)
4323 {
4324         u8 mac[ETH_ALEN];
4325         int status;
4326
4327         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4328                 status = be_cmd_get_perm_mac(adapter, mac);
4329                 if (status)
4330                         return status;
4331
4332                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4333                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4334         }
4335
4336         return 0;
4337 }
4338
4339 static void be_schedule_worker(struct be_adapter *adapter)
4340 {
4341         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4342         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4343 }
4344
4345 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4346 {
4347         schedule_delayed_work(&adapter->be_err_detection_work,
4348                               msecs_to_jiffies(delay));
4349         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4350 }
4351
4352 static int be_setup_queues(struct be_adapter *adapter)
4353 {
4354         struct net_device *netdev = adapter->netdev;
4355         int status;
4356
4357         status = be_evt_queues_create(adapter);
4358         if (status)
4359                 goto err;
4360
4361         status = be_tx_qs_create(adapter);
4362         if (status)
4363                 goto err;
4364
4365         status = be_rx_cqs_create(adapter);
4366         if (status)
4367                 goto err;
4368
4369         status = be_mcc_queues_create(adapter);
4370         if (status)
4371                 goto err;
4372
4373         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4374         if (status)
4375                 goto err;
4376
4377         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4378         if (status)
4379                 goto err;
4380
4381         return 0;
4382 err:
4383         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4384         return status;
4385 }
4386
4387 static int be_if_create(struct be_adapter *adapter)
4388 {
4389         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4390         u32 cap_flags = be_if_cap_flags(adapter);
4391         int status;
4392
4393         if (adapter->cfg_num_rx_irqs == 1)
4394                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4395
4396         en_flags &= cap_flags;
4397         /* will enable all the needed filter flags in be_open() */
4398         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4399                                   &adapter->if_handle, 0);
4400
4401         return status;
4402 }
4403
4404 int be_update_queues(struct be_adapter *adapter)
4405 {
4406         struct net_device *netdev = adapter->netdev;
4407         int status;
4408
4409         if (netif_running(netdev))
4410                 be_close(netdev);
4411
4412         be_cancel_worker(adapter);
4413
4414         /* If any vectors have been shared with RoCE we cannot re-program
4415          * the MSIx table.
4416          */
4417         if (!adapter->num_msix_roce_vec)
4418                 be_msix_disable(adapter);
4419
4420         be_clear_queues(adapter);
4421         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4422         if (status)
4423                 return status;
4424
4425         if (!msix_enabled(adapter)) {
4426                 status = be_msix_enable(adapter);
4427                 if (status)
4428                         return status;
4429         }
4430
4431         status = be_if_create(adapter);
4432         if (status)
4433                 return status;
4434
4435         status = be_setup_queues(adapter);
4436         if (status)
4437                 return status;
4438
4439         be_schedule_worker(adapter);
4440
4441         if (netif_running(netdev))
4442                 status = be_open(netdev);
4443
4444         return status;
4445 }
4446
4447 static inline int fw_major_num(const char *fw_ver)
4448 {
4449         int fw_major = 0, i;
4450
4451         i = sscanf(fw_ver, "%d.", &fw_major);
4452         if (i != 1)
4453                 return 0;
4454
4455         return fw_major;
4456 }
4457
4458 /* If any VFs are already enabled don't FLR the PF */
4459 static bool be_reset_required(struct be_adapter *adapter)
4460 {
4461         return pci_num_vf(adapter->pdev) ? false : true;
4462 }
4463
4464 /* Wait for the FW to be ready and perform the required initialization */
4465 static int be_func_init(struct be_adapter *adapter)
4466 {
4467         int status;
4468
4469         status = be_fw_wait_ready(adapter);
4470         if (status)
4471                 return status;
4472
4473         if (be_reset_required(adapter)) {
4474                 status = be_cmd_reset_function(adapter);
4475                 if (status)
4476                         return status;
4477
4478                 /* Wait for interrupts to quiesce after an FLR */
4479                 msleep(100);
4480
4481                 /* We can clear all errors when function reset succeeds */
4482                 be_clear_error(adapter, BE_CLEAR_ALL);
4483         }
4484
4485         /* Tell FW we're ready to fire cmds */
4486         status = be_cmd_fw_init(adapter);
4487         if (status)
4488                 return status;
4489
4490         /* Allow interrupts for other ULPs running on NIC function */
4491         be_intr_set(adapter, true);
4492
4493         return 0;
4494 }
4495
4496 static int be_setup(struct be_adapter *adapter)
4497 {
4498         struct device *dev = &adapter->pdev->dev;
4499         int status;
4500
4501         status = be_func_init(adapter);
4502         if (status)
4503                 return status;
4504
4505         be_setup_init(adapter);
4506
4507         if (!lancer_chip(adapter))
4508                 be_cmd_req_native_mode(adapter);
4509
4510         /* invoke this cmd first to get pf_num and vf_num which are needed
4511          * for issuing profile related cmds
4512          */
4513         if (!BEx_chip(adapter)) {
4514                 status = be_cmd_get_func_config(adapter, NULL);
4515                 if (status)
4516                         return status;
4517         }
4518
4519         status = be_get_config(adapter);
4520         if (status)
4521                 goto err;
4522
4523         if (!BE2_chip(adapter) && be_physfn(adapter))
4524                 be_alloc_sriov_res(adapter);
4525
4526         status = be_get_resources(adapter);
4527         if (status)
4528                 goto err;
4529
4530         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4531                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4532         if (!adapter->pmac_id)
4533                 return -ENOMEM;
4534
4535         status = be_msix_enable(adapter);
4536         if (status)
4537                 goto err;
4538
4539         /* will enable all the needed filter flags in be_open() */
4540         status = be_if_create(adapter);
4541         if (status)
4542                 goto err;
4543
4544         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4545         rtnl_lock();
4546         status = be_setup_queues(adapter);
4547         rtnl_unlock();
4548         if (status)
4549                 goto err;
4550
4551         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4552
4553         status = be_mac_setup(adapter);
4554         if (status)
4555                 goto err;
4556
4557         be_cmd_get_fw_ver(adapter);
4558         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4559
4560         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4561                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4562                         adapter->fw_ver);
4563                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4564         }
4565
4566         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4567                                          adapter->rx_fc);
4568         if (status)
4569                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4570                                         &adapter->rx_fc);
4571
4572         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4573                  adapter->tx_fc, adapter->rx_fc);
4574
4575         if (be_physfn(adapter))
4576                 be_cmd_set_logical_link_config(adapter,
4577                                                IFLA_VF_LINK_STATE_AUTO, 0);
4578
4579         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4580          * confusing a linux bridge or OVS that it might be connected to.
4581          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4582          * when SRIOV is not enabled.
4583          */
4584         if (BE3_chip(adapter))
4585                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4586                                       PORT_FWD_TYPE_PASSTHRU, 0);
4587
4588         if (adapter->num_vfs)
4589                 be_vf_setup(adapter);
4590
4591         status = be_cmd_get_phy_info(adapter);
4592         if (!status && be_pause_supported(adapter))
4593                 adapter->phy.fc_autoneg = 1;
4594
4595         be_schedule_worker(adapter);
4596         adapter->flags |= BE_FLAGS_SETUP_DONE;
4597         return 0;
4598 err:
4599         be_clear(adapter);
4600         return status;
4601 }
4602
4603 #ifdef CONFIG_NET_POLL_CONTROLLER
4604 static void be_netpoll(struct net_device *netdev)
4605 {
4606         struct be_adapter *adapter = netdev_priv(netdev);
4607         struct be_eq_obj *eqo;
4608         int i;
4609
4610         for_all_evt_queues(adapter, eqo, i) {
4611                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4612                 napi_schedule(&eqo->napi);
4613         }
4614 }
4615 #endif
4616
4617 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4618 {
4619         const struct firmware *fw;
4620         int status;
4621
4622         if (!netif_running(adapter->netdev)) {
4623                 dev_err(&adapter->pdev->dev,
4624                         "Firmware load not allowed (interface is down)\n");
4625                 return -ENETDOWN;
4626         }
4627
4628         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4629         if (status)
4630                 goto fw_exit;
4631
4632         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4633
4634         if (lancer_chip(adapter))
4635                 status = lancer_fw_download(adapter, fw);
4636         else
4637                 status = be_fw_download(adapter, fw);
4638
4639         if (!status)
4640                 be_cmd_get_fw_ver(adapter);
4641
4642 fw_exit:
4643         release_firmware(fw);
4644         return status;
4645 }
4646
4647 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4648                                  u16 flags)
4649 {
4650         struct be_adapter *adapter = netdev_priv(dev);
4651         struct nlattr *attr, *br_spec;
4652         int rem;
4653         int status = 0;
4654         u16 mode = 0;
4655
4656         if (!sriov_enabled(adapter))
4657                 return -EOPNOTSUPP;
4658
4659         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4660         if (!br_spec)
4661                 return -EINVAL;
4662
4663         nla_for_each_nested(attr, br_spec, rem) {
4664                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4665                         continue;
4666
4667                 if (nla_len(attr) < sizeof(mode))
4668                         return -EINVAL;
4669
4670                 mode = nla_get_u16(attr);
4671                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4672                         return -EOPNOTSUPP;
4673
4674                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4675                         return -EINVAL;
4676
4677                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4678                                                adapter->if_handle,
4679                                                mode == BRIDGE_MODE_VEPA ?
4680                                                PORT_FWD_TYPE_VEPA :
4681                                                PORT_FWD_TYPE_VEB, 0);
4682                 if (status)
4683                         goto err;
4684
4685                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4686                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4687
4688                 return status;
4689         }
4690 err:
4691         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4692                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4693
4694         return status;
4695 }
4696
4697 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4698                                  struct net_device *dev, u32 filter_mask,
4699                                  int nlflags)
4700 {
4701         struct be_adapter *adapter = netdev_priv(dev);
4702         int status = 0;
4703         u8 hsw_mode;
4704
4705         /* BE and Lancer chips support VEB mode only */
4706         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4707                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4708                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4709                         return 0;
4710                 hsw_mode = PORT_FWD_TYPE_VEB;
4711         } else {
4712                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4713                                                adapter->if_handle, &hsw_mode,
4714                                                NULL);
4715                 if (status)
4716                         return 0;
4717
4718                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4719                         return 0;
4720         }
4721
4722         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4723                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4724                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4725                                        0, 0, nlflags, filter_mask, NULL);
4726 }
4727
4728 /* VxLAN offload Notes:
4729  *
4730  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4731  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4732  * is expected to work across all types of IP tunnels once exported. Skyhawk
4733  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4734  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4735  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4736  * those other tunnels are unexported on the fly through ndo_features_check().
4737  *
4738  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4739  * adds more than one port, disable offloads and don't re-enable them again
4740  * until after all the tunnels are removed.
4741  */
4742 static void be_add_vxlan_port(struct net_device *netdev,
4743                               struct udp_tunnel_info *ti)
4744 {
4745         struct be_adapter *adapter = netdev_priv(netdev);
4746         struct device *dev = &adapter->pdev->dev;
4747         __be16 port = ti->port;
4748         int status;
4749
4750         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4751                 return;
4752
4753         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4754                 return;
4755
4756         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4757                 adapter->vxlan_port_aliases++;
4758                 return;
4759         }
4760
4761         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4762                 dev_info(dev,
4763                          "Only one UDP port supported for VxLAN offloads\n");
4764                 dev_info(dev, "Disabling VxLAN offloads\n");
4765                 adapter->vxlan_port_count++;
4766                 goto err;
4767         }
4768
4769         if (adapter->vxlan_port_count++ >= 1)
4770                 return;
4771
4772         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4773                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4774         if (status) {
4775                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4776                 goto err;
4777         }
4778
4779         status = be_cmd_set_vxlan_port(adapter, port);
4780         if (status) {
4781                 dev_warn(dev, "Failed to add VxLAN port\n");
4782                 goto err;
4783         }
4784         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4785         adapter->vxlan_port = port;
4786
4787         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4788                                    NETIF_F_TSO | NETIF_F_TSO6 |
4789                                    NETIF_F_GSO_UDP_TUNNEL;
4790         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4791         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4792
4793         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4794                  be16_to_cpu(port));
4795         return;
4796 err:
4797         be_disable_vxlan_offloads(adapter);
4798 }
4799
4800 static void be_del_vxlan_port(struct net_device *netdev,
4801                               struct udp_tunnel_info *ti)
4802 {
4803         struct be_adapter *adapter = netdev_priv(netdev);
4804         __be16 port = ti->port;
4805
4806         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4807                 return;
4808
4809         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4810                 return;
4811
4812         if (adapter->vxlan_port != port)
4813                 goto done;
4814
4815         if (adapter->vxlan_port_aliases) {
4816                 adapter->vxlan_port_aliases--;
4817                 return;
4818         }
4819
4820         be_disable_vxlan_offloads(adapter);
4821
4822         dev_info(&adapter->pdev->dev,
4823                  "Disabled VxLAN offloads for UDP port %d\n",
4824                  be16_to_cpu(port));
4825 done:
4826         adapter->vxlan_port_count--;
4827 }
4828
4829 static netdev_features_t be_features_check(struct sk_buff *skb,
4830                                            struct net_device *dev,
4831                                            netdev_features_t features)
4832 {
4833         struct be_adapter *adapter = netdev_priv(dev);
4834         u8 l4_hdr = 0;
4835
4836         /* The code below restricts offload features for some tunneled packets.
4837          * Offload features for normal (non tunnel) packets are unchanged.
4838          */
4839         if (!skb->encapsulation ||
4840             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4841                 return features;
4842
4843         /* It's an encapsulated packet and VxLAN offloads are enabled. We
4844          * should disable tunnel offload features if it's not a VxLAN packet,
4845          * as tunnel offloads have been enabled only for VxLAN. This is done to
4846          * allow other tunneled traffic like GRE work fine while VxLAN
4847          * offloads are configured in Skyhawk-R.
4848          */
4849         switch (vlan_get_protocol(skb)) {
4850         case htons(ETH_P_IP):
4851                 l4_hdr = ip_hdr(skb)->protocol;
4852                 break;
4853         case htons(ETH_P_IPV6):
4854                 l4_hdr = ipv6_hdr(skb)->nexthdr;
4855                 break;
4856         default:
4857                 return features;
4858         }
4859
4860         if (l4_hdr != IPPROTO_UDP ||
4861             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4862             skb->inner_protocol != htons(ETH_P_TEB) ||
4863             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4864             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4865                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4866
4867         return features;
4868 }
4869
4870 static int be_get_phys_port_id(struct net_device *dev,
4871                                struct netdev_phys_item_id *ppid)
4872 {
4873         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4874         struct be_adapter *adapter = netdev_priv(dev);
4875         u8 *id;
4876
4877         if (MAX_PHYS_ITEM_ID_LEN < id_len)
4878                 return -ENOSPC;
4879
4880         ppid->id[0] = adapter->hba_port_num + 1;
4881         id = &ppid->id[1];
4882         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4883              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4884                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4885
4886         ppid->id_len = id_len;
4887
4888         return 0;
4889 }
4890
4891 static const struct net_device_ops be_netdev_ops = {
4892         .ndo_open               = be_open,
4893         .ndo_stop               = be_close,
4894         .ndo_start_xmit         = be_xmit,
4895         .ndo_set_rx_mode        = be_set_rx_mode,
4896         .ndo_set_mac_address    = be_mac_addr_set,
4897         .ndo_change_mtu         = be_change_mtu,
4898         .ndo_get_stats64        = be_get_stats64,
4899         .ndo_validate_addr      = eth_validate_addr,
4900         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
4901         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
4902         .ndo_set_vf_mac         = be_set_vf_mac,
4903         .ndo_set_vf_vlan        = be_set_vf_vlan,
4904         .ndo_set_vf_rate        = be_set_vf_tx_rate,
4905         .ndo_get_vf_config      = be_get_vf_config,
4906         .ndo_set_vf_link_state  = be_set_vf_link_state,
4907         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4908 #ifdef CONFIG_NET_POLL_CONTROLLER
4909         .ndo_poll_controller    = be_netpoll,
4910 #endif
4911         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
4912         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
4913 #ifdef CONFIG_NET_RX_BUSY_POLL
4914         .ndo_busy_poll          = be_busy_poll,
4915 #endif
4916         .ndo_udp_tunnel_add     = be_add_vxlan_port,
4917         .ndo_udp_tunnel_del     = be_del_vxlan_port,
4918         .ndo_features_check     = be_features_check,
4919         .ndo_get_phys_port_id   = be_get_phys_port_id,
4920 };
4921
4922 static void be_netdev_init(struct net_device *netdev)
4923 {
4924         struct be_adapter *adapter = netdev_priv(netdev);
4925
4926         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4927                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
4928                 NETIF_F_HW_VLAN_CTAG_TX;
4929         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
4930                 netdev->hw_features |= NETIF_F_RXHASH;
4931
4932         netdev->features |= netdev->hw_features |
4933                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
4934
4935         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4936                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4937
4938         netdev->priv_flags |= IFF_UNICAST_FLT;
4939
4940         netdev->flags |= IFF_MULTICAST;
4941
4942         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
4943
4944         netdev->netdev_ops = &be_netdev_ops;
4945
4946         netdev->ethtool_ops = &be_ethtool_ops;
4947 }
4948
4949 static void be_cleanup(struct be_adapter *adapter)
4950 {
4951         struct net_device *netdev = adapter->netdev;
4952
4953         rtnl_lock();
4954         netif_device_detach(netdev);
4955         if (netif_running(netdev))
4956                 be_close(netdev);
4957         rtnl_unlock();
4958
4959         be_clear(adapter);
4960 }
4961
4962 static int be_resume(struct be_adapter *adapter)
4963 {
4964         struct net_device *netdev = adapter->netdev;
4965         int status;
4966
4967         status = be_setup(adapter);
4968         if (status)
4969                 return status;
4970
4971         rtnl_lock();
4972         if (netif_running(netdev))
4973                 status = be_open(netdev);
4974         rtnl_unlock();
4975
4976         if (status)
4977                 return status;
4978
4979         netif_device_attach(netdev);
4980
4981         return 0;
4982 }
4983
4984 static int be_err_recover(struct be_adapter *adapter)
4985 {
4986         int status;
4987
4988         /* Error recovery is supported only Lancer as of now */
4989         if (!lancer_chip(adapter))
4990                 return -EIO;
4991
4992         /* Wait for adapter to reach quiescent state before
4993          * destroying queues
4994          */
4995         status = be_fw_wait_ready(adapter);
4996         if (status)
4997                 goto err;
4998
4999         be_cleanup(adapter);
5000
5001         status = be_resume(adapter);
5002         if (status)
5003                 goto err;
5004
5005         return 0;
5006 err:
5007         return status;
5008 }
5009
5010 static void be_err_detection_task(struct work_struct *work)
5011 {
5012         struct be_adapter *adapter =
5013                                 container_of(work, struct be_adapter,
5014                                              be_err_detection_work.work);
5015         struct device *dev = &adapter->pdev->dev;
5016         int recovery_status;
5017         int delay = ERR_DETECTION_DELAY;
5018
5019         be_detect_error(adapter);
5020
5021         if (be_check_error(adapter, BE_ERROR_HW))
5022                 recovery_status = be_err_recover(adapter);
5023         else
5024                 goto reschedule_task;
5025
5026         if (!recovery_status) {
5027                 adapter->recovery_retries = 0;
5028                 dev_info(dev, "Adapter recovery successful\n");
5029                 goto reschedule_task;
5030         } else if (be_virtfn(adapter)) {
5031                 /* For VFs, check if PF have allocated resources
5032                  * every second.
5033                  */
5034                 dev_err(dev, "Re-trying adapter recovery\n");
5035                 goto reschedule_task;
5036         } else if (adapter->recovery_retries++ <
5037                    MAX_ERR_RECOVERY_RETRY_COUNT) {
5038                 /* In case of another error during recovery, it takes 30 sec
5039                  * for adapter to come out of error. Retry error recovery after
5040                  * this time interval.
5041                  */
5042                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5043                 delay = ERR_RECOVERY_RETRY_DELAY;
5044                 goto reschedule_task;
5045         } else {
5046                 dev_err(dev, "Adapter recovery failed\n");
5047         }
5048
5049         return;
5050 reschedule_task:
5051         be_schedule_err_detection(adapter, delay);
5052 }
5053
5054 static void be_log_sfp_info(struct be_adapter *adapter)
5055 {
5056         int status;
5057
5058         status = be_cmd_query_sfp_info(adapter);
5059         if (!status) {
5060                 dev_err(&adapter->pdev->dev,
5061                         "Port %c: %s Vendor: %s part no: %s",
5062                         adapter->port_name,
5063                         be_misconfig_evt_port_state[adapter->phy_state],
5064                         adapter->phy.vendor_name,
5065                         adapter->phy.vendor_pn);
5066         }
5067         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5068 }
5069
5070 static void be_worker(struct work_struct *work)
5071 {
5072         struct be_adapter *adapter =
5073                 container_of(work, struct be_adapter, work.work);
5074         struct be_rx_obj *rxo;
5075         int i;
5076
5077         if (be_physfn(adapter) &&
5078             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5079                 be_cmd_get_die_temperature(adapter);
5080
5081         /* when interrupts are not yet enabled, just reap any pending
5082          * mcc completions
5083          */
5084         if (!netif_running(adapter->netdev)) {
5085                 local_bh_disable();
5086                 be_process_mcc(adapter);
5087                 local_bh_enable();
5088                 goto reschedule;
5089         }
5090
5091         if (!adapter->stats_cmd_sent) {
5092                 if (lancer_chip(adapter))
5093                         lancer_cmd_get_pport_stats(adapter,
5094                                                    &adapter->stats_cmd);
5095                 else
5096                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5097         }
5098
5099         for_all_rx_queues(adapter, rxo, i) {
5100                 /* Replenish RX-queues starved due to memory
5101                  * allocation failures.
5102                  */
5103                 if (rxo->rx_post_starved)
5104                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5105         }
5106
5107         /* EQ-delay update for Skyhawk is done while notifying EQ */
5108         if (!skyhawk_chip(adapter))
5109                 be_eqd_update(adapter, false);
5110
5111         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5112                 be_log_sfp_info(adapter);
5113
5114 reschedule:
5115         adapter->work_counter++;
5116         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
5117 }
5118
5119 static void be_unmap_pci_bars(struct be_adapter *adapter)
5120 {
5121         if (adapter->csr)
5122                 pci_iounmap(adapter->pdev, adapter->csr);
5123         if (adapter->db)
5124                 pci_iounmap(adapter->pdev, adapter->db);
5125         if (adapter->pcicfg && adapter->pcicfg_mapped)
5126                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5127 }
5128
5129 static int db_bar(struct be_adapter *adapter)
5130 {
5131         if (lancer_chip(adapter) || be_virtfn(adapter))
5132                 return 0;
5133         else
5134                 return 4;
5135 }
5136
5137 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5138 {
5139         if (skyhawk_chip(adapter)) {
5140                 adapter->roce_db.size = 4096;
5141                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5142                                                               db_bar(adapter));
5143                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5144                                                                db_bar(adapter));
5145         }
5146         return 0;
5147 }
5148
5149 static int be_map_pci_bars(struct be_adapter *adapter)
5150 {
5151         struct pci_dev *pdev = adapter->pdev;
5152         u8 __iomem *addr;
5153         u32 sli_intf;
5154
5155         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5156         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5157                                 SLI_INTF_FAMILY_SHIFT;
5158         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5159
5160         if (BEx_chip(adapter) && be_physfn(adapter)) {
5161                 adapter->csr = pci_iomap(pdev, 2, 0);
5162                 if (!adapter->csr)
5163                         return -ENOMEM;
5164         }
5165
5166         addr = pci_iomap(pdev, db_bar(adapter), 0);
5167         if (!addr)
5168                 goto pci_map_err;
5169         adapter->db = addr;
5170
5171         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5172                 if (be_physfn(adapter)) {
5173                         /* PCICFG is the 2nd BAR in BE2 */
5174                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5175                         if (!addr)
5176                                 goto pci_map_err;
5177                         adapter->pcicfg = addr;
5178                         adapter->pcicfg_mapped = true;
5179                 } else {
5180                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5181                         adapter->pcicfg_mapped = false;
5182                 }
5183         }
5184
5185         be_roce_map_pci_bars(adapter);
5186         return 0;
5187
5188 pci_map_err:
5189         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5190         be_unmap_pci_bars(adapter);
5191         return -ENOMEM;
5192 }
5193
5194 static void be_drv_cleanup(struct be_adapter *adapter)
5195 {
5196         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5197         struct device *dev = &adapter->pdev->dev;
5198
5199         if (mem->va)
5200                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5201
5202         mem = &adapter->rx_filter;
5203         if (mem->va)
5204                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5205
5206         mem = &adapter->stats_cmd;
5207         if (mem->va)
5208                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5209 }
5210
5211 /* Allocate and initialize various fields in be_adapter struct */
5212 static int be_drv_init(struct be_adapter *adapter)
5213 {
5214         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5215         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5216         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5217         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5218         struct device *dev = &adapter->pdev->dev;
5219         int status = 0;
5220
5221         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5222         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5223                                                  &mbox_mem_alloc->dma,
5224                                                  GFP_KERNEL);
5225         if (!mbox_mem_alloc->va)
5226                 return -ENOMEM;
5227
5228         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5229         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5230         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5231
5232         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5233         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5234                                             &rx_filter->dma, GFP_KERNEL);
5235         if (!rx_filter->va) {
5236                 status = -ENOMEM;
5237                 goto free_mbox;
5238         }
5239
5240         if (lancer_chip(adapter))
5241                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5242         else if (BE2_chip(adapter))
5243                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5244         else if (BE3_chip(adapter))
5245                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5246         else
5247                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5248         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5249                                             &stats_cmd->dma, GFP_KERNEL);
5250         if (!stats_cmd->va) {
5251                 status = -ENOMEM;
5252                 goto free_rx_filter;
5253         }
5254
5255         mutex_init(&adapter->mbox_lock);
5256         spin_lock_init(&adapter->mcc_lock);
5257         spin_lock_init(&adapter->mcc_cq_lock);
5258         init_completion(&adapter->et_cmd_compl);
5259
5260         pci_save_state(adapter->pdev);
5261
5262         INIT_DELAYED_WORK(&adapter->work, be_worker);
5263         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5264                           be_err_detection_task);
5265
5266         adapter->rx_fc = true;
5267         adapter->tx_fc = true;
5268
5269         /* Must be a power of 2 or else MODULO will BUG_ON */
5270         adapter->be_get_temp_freq = 64;
5271
5272         return 0;
5273
5274 free_rx_filter:
5275         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5276 free_mbox:
5277         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5278                           mbox_mem_alloc->dma);
5279         return status;
5280 }
5281
5282 static void be_remove(struct pci_dev *pdev)
5283 {
5284         struct be_adapter *adapter = pci_get_drvdata(pdev);
5285
5286         if (!adapter)
5287                 return;
5288
5289         be_roce_dev_remove(adapter);
5290         be_intr_set(adapter, false);
5291
5292         be_cancel_err_detection(adapter);
5293
5294         unregister_netdev(adapter->netdev);
5295
5296         be_clear(adapter);
5297
5298         /* tell fw we're done with firing cmds */
5299         be_cmd_fw_clean(adapter);
5300
5301         be_unmap_pci_bars(adapter);
5302         be_drv_cleanup(adapter);
5303
5304         pci_disable_pcie_error_reporting(pdev);
5305
5306         pci_release_regions(pdev);
5307         pci_disable_device(pdev);
5308
5309         free_netdev(adapter->netdev);
5310 }
5311
5312 static ssize_t be_hwmon_show_temp(struct device *dev,
5313                                   struct device_attribute *dev_attr,
5314                                   char *buf)
5315 {
5316         struct be_adapter *adapter = dev_get_drvdata(dev);
5317
5318         /* Unit: millidegree Celsius */
5319         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5320                 return -EIO;
5321         else
5322                 return sprintf(buf, "%u\n",
5323                                adapter->hwmon_info.be_on_die_temp * 1000);
5324 }
5325
5326 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5327                           be_hwmon_show_temp, NULL, 1);
5328
5329 static struct attribute *be_hwmon_attrs[] = {
5330         &sensor_dev_attr_temp1_input.dev_attr.attr,
5331         NULL
5332 };
5333
5334 ATTRIBUTE_GROUPS(be_hwmon);
5335
5336 static char *mc_name(struct be_adapter *adapter)
5337 {
5338         char *str = ""; /* default */
5339
5340         switch (adapter->mc_type) {
5341         case UMC:
5342                 str = "UMC";
5343                 break;
5344         case FLEX10:
5345                 str = "FLEX10";
5346                 break;
5347         case vNIC1:
5348                 str = "vNIC-1";
5349                 break;
5350         case nPAR:
5351                 str = "nPAR";
5352                 break;
5353         case UFP:
5354                 str = "UFP";
5355                 break;
5356         case vNIC2:
5357                 str = "vNIC-2";
5358                 break;
5359         default:
5360                 str = "";
5361         }
5362
5363         return str;
5364 }
5365
5366 static inline char *func_name(struct be_adapter *adapter)
5367 {
5368         return be_physfn(adapter) ? "PF" : "VF";
5369 }
5370
5371 static inline char *nic_name(struct pci_dev *pdev)
5372 {
5373         switch (pdev->device) {
5374         case OC_DEVICE_ID1:
5375                 return OC_NAME;
5376         case OC_DEVICE_ID2:
5377                 return OC_NAME_BE;
5378         case OC_DEVICE_ID3:
5379         case OC_DEVICE_ID4:
5380                 return OC_NAME_LANCER;
5381         case BE_DEVICE_ID2:
5382                 return BE3_NAME;
5383         case OC_DEVICE_ID5:
5384         case OC_DEVICE_ID6:
5385                 return OC_NAME_SH;
5386         default:
5387                 return BE_NAME;
5388         }
5389 }
5390
5391 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5392 {
5393         struct be_adapter *adapter;
5394         struct net_device *netdev;
5395         int status = 0;
5396
5397         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5398
5399         status = pci_enable_device(pdev);
5400         if (status)
5401                 goto do_none;
5402
5403         status = pci_request_regions(pdev, DRV_NAME);
5404         if (status)
5405                 goto disable_dev;
5406         pci_set_master(pdev);
5407
5408         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5409         if (!netdev) {
5410                 status = -ENOMEM;
5411                 goto rel_reg;
5412         }
5413         adapter = netdev_priv(netdev);
5414         adapter->pdev = pdev;
5415         pci_set_drvdata(pdev, adapter);
5416         adapter->netdev = netdev;
5417         SET_NETDEV_DEV(netdev, &pdev->dev);
5418
5419         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5420         if (!status) {
5421                 netdev->features |= NETIF_F_HIGHDMA;
5422         } else {
5423                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5424                 if (status) {
5425                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5426                         goto free_netdev;
5427                 }
5428         }
5429
5430         status = pci_enable_pcie_error_reporting(pdev);
5431         if (!status)
5432                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5433
5434         status = be_map_pci_bars(adapter);
5435         if (status)
5436                 goto free_netdev;
5437
5438         status = be_drv_init(adapter);
5439         if (status)
5440                 goto unmap_bars;
5441
5442         status = be_setup(adapter);
5443         if (status)
5444                 goto drv_cleanup;
5445
5446         be_netdev_init(netdev);
5447         status = register_netdev(netdev);
5448         if (status != 0)
5449                 goto unsetup;
5450
5451         be_roce_dev_add(adapter);
5452
5453         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5454
5455         /* On Die temperature not supported for VF. */
5456         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5457                 adapter->hwmon_info.hwmon_dev =
5458                         devm_hwmon_device_register_with_groups(&pdev->dev,
5459                                                                DRV_NAME,
5460                                                                adapter,
5461                                                                be_hwmon_groups);
5462                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5463         }
5464
5465         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5466                  func_name(adapter), mc_name(adapter), adapter->port_name);
5467
5468         return 0;
5469
5470 unsetup:
5471         be_clear(adapter);
5472 drv_cleanup:
5473         be_drv_cleanup(adapter);
5474 unmap_bars:
5475         be_unmap_pci_bars(adapter);
5476 free_netdev:
5477         free_netdev(netdev);
5478 rel_reg:
5479         pci_release_regions(pdev);
5480 disable_dev:
5481         pci_disable_device(pdev);
5482 do_none:
5483         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5484         return status;
5485 }
5486
5487 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5488 {
5489         struct be_adapter *adapter = pci_get_drvdata(pdev);
5490
5491         be_intr_set(adapter, false);
5492         be_cancel_err_detection(adapter);
5493
5494         be_cleanup(adapter);
5495
5496         pci_save_state(pdev);
5497         pci_disable_device(pdev);
5498         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5499         return 0;
5500 }
5501
5502 static int be_pci_resume(struct pci_dev *pdev)
5503 {
5504         struct be_adapter *adapter = pci_get_drvdata(pdev);
5505         int status = 0;
5506
5507         status = pci_enable_device(pdev);
5508         if (status)
5509                 return status;
5510
5511         pci_restore_state(pdev);
5512
5513         status = be_resume(adapter);
5514         if (status)
5515                 return status;
5516
5517         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5518
5519         return 0;
5520 }
5521
5522 /*
5523  * An FLR will stop BE from DMAing any data.
5524  */
5525 static void be_shutdown(struct pci_dev *pdev)
5526 {
5527         struct be_adapter *adapter = pci_get_drvdata(pdev);
5528
5529         if (!adapter)
5530                 return;
5531
5532         be_roce_dev_shutdown(adapter);
5533         cancel_delayed_work_sync(&adapter->work);
5534         be_cancel_err_detection(adapter);
5535
5536         netif_device_detach(adapter->netdev);
5537
5538         be_cmd_reset_function(adapter);
5539
5540         pci_disable_device(pdev);
5541 }
5542
5543 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5544                                             pci_channel_state_t state)
5545 {
5546         struct be_adapter *adapter = pci_get_drvdata(pdev);
5547
5548         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5549
5550         be_roce_dev_remove(adapter);
5551
5552         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5553                 be_set_error(adapter, BE_ERROR_EEH);
5554
5555                 be_cancel_err_detection(adapter);
5556
5557                 be_cleanup(adapter);
5558         }
5559
5560         if (state == pci_channel_io_perm_failure)
5561                 return PCI_ERS_RESULT_DISCONNECT;
5562
5563         pci_disable_device(pdev);
5564
5565         /* The error could cause the FW to trigger a flash debug dump.
5566          * Resetting the card while flash dump is in progress
5567          * can cause it not to recover; wait for it to finish.
5568          * Wait only for first function as it is needed only once per
5569          * adapter.
5570          */
5571         if (pdev->devfn == 0)
5572                 ssleep(30);
5573
5574         return PCI_ERS_RESULT_NEED_RESET;
5575 }
5576
5577 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5578 {
5579         struct be_adapter *adapter = pci_get_drvdata(pdev);
5580         int status;
5581
5582         dev_info(&adapter->pdev->dev, "EEH reset\n");
5583
5584         status = pci_enable_device(pdev);
5585         if (status)
5586                 return PCI_ERS_RESULT_DISCONNECT;
5587
5588         pci_set_master(pdev);
5589         pci_restore_state(pdev);
5590
5591         /* Check if card is ok and fw is ready */
5592         dev_info(&adapter->pdev->dev,
5593                  "Waiting for FW to be ready after EEH reset\n");
5594         status = be_fw_wait_ready(adapter);
5595         if (status)
5596                 return PCI_ERS_RESULT_DISCONNECT;
5597
5598         pci_cleanup_aer_uncorrect_error_status(pdev);
5599         be_clear_error(adapter, BE_CLEAR_ALL);
5600         return PCI_ERS_RESULT_RECOVERED;
5601 }
5602
5603 static void be_eeh_resume(struct pci_dev *pdev)
5604 {
5605         int status = 0;
5606         struct be_adapter *adapter = pci_get_drvdata(pdev);
5607
5608         dev_info(&adapter->pdev->dev, "EEH resume\n");
5609
5610         pci_save_state(pdev);
5611
5612         status = be_resume(adapter);
5613         if (status)
5614                 goto err;
5615
5616         be_roce_dev_add(adapter);
5617
5618         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5619         return;
5620 err:
5621         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5622 }
5623
5624 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5625 {
5626         struct be_adapter *adapter = pci_get_drvdata(pdev);
5627         struct be_resources vft_res = {0};
5628         int status;
5629
5630         if (!num_vfs)
5631                 be_vf_clear(adapter);
5632
5633         adapter->num_vfs = num_vfs;
5634
5635         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5636                 dev_warn(&pdev->dev,
5637                          "Cannot disable VFs while they are assigned\n");
5638                 return -EBUSY;
5639         }
5640
5641         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5642          * are equally distributed across the max-number of VFs. The user may
5643          * request only a subset of the max-vfs to be enabled.
5644          * Based on num_vfs, redistribute the resources across num_vfs so that
5645          * each VF will have access to more number of resources.
5646          * This facility is not available in BE3 FW.
5647          * Also, this is done by FW in Lancer chip.
5648          */
5649         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5650                 be_calculate_vf_res(adapter, adapter->num_vfs,
5651                                     &vft_res);
5652                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5653                                                  adapter->num_vfs, &vft_res);
5654                 if (status)
5655                         dev_err(&pdev->dev,
5656                                 "Failed to optimize SR-IOV resources\n");
5657         }
5658
5659         status = be_get_resources(adapter);
5660         if (status)
5661                 return be_cmd_status(status);
5662
5663         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5664         rtnl_lock();
5665         status = be_update_queues(adapter);
5666         rtnl_unlock();
5667         if (status)
5668                 return be_cmd_status(status);
5669
5670         if (adapter->num_vfs)
5671                 status = be_vf_setup(adapter);
5672
5673         if (!status)
5674                 return adapter->num_vfs;
5675
5676         return 0;
5677 }
5678
5679 static const struct pci_error_handlers be_eeh_handlers = {
5680         .error_detected = be_eeh_err_detected,
5681         .slot_reset = be_eeh_reset,
5682         .resume = be_eeh_resume,
5683 };
5684
5685 static struct pci_driver be_driver = {
5686         .name = DRV_NAME,
5687         .id_table = be_dev_ids,
5688         .probe = be_probe,
5689         .remove = be_remove,
5690         .suspend = be_suspend,
5691         .resume = be_pci_resume,
5692         .shutdown = be_shutdown,
5693         .sriov_configure = be_pci_sriov_configure,
5694         .err_handler = &be_eeh_handlers
5695 };
5696
5697 static int __init be_init_module(void)
5698 {
5699         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5700             rx_frag_size != 2048) {
5701                 printk(KERN_WARNING DRV_NAME
5702                         " : Module param rx_frag_size must be 2048/4096/8192."
5703                         " Using 2048\n");
5704                 rx_frag_size = 2048;
5705         }
5706
5707         if (num_vfs > 0) {
5708                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5709                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5710         }
5711
5712         return pci_register_driver(&be_driver);
5713 }
5714 module_init(be_init_module);
5715
5716 static void __exit be_exit_module(void)
5717 {
5718         pci_unregister_driver(&be_driver);
5719 }
5720 module_exit(be_exit_module);