be2net: clear vlan-promisc setting before programming the vlan list
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58         "CEV",
59         "CTX",
60         "DBUF",
61         "ERX",
62         "Host",
63         "MPU",
64         "NDMA",
65         "PTC ",
66         "RDMA ",
67         "RXF ",
68         "RXIPS ",
69         "RXULP0 ",
70         "RXULP1 ",
71         "RXULP2 ",
72         "TIM ",
73         "TPOST ",
74         "TPRE ",
75         "TXIPS ",
76         "TXULP0 ",
77         "TXULP1 ",
78         "UC ",
79         "WDMA ",
80         "TXULP2 ",
81         "HOST1 ",
82         "P0_OB_LINK ",
83         "P1_OB_LINK ",
84         "HOST_GPIO ",
85         "MBOX ",
86         "ERX2 ",
87         "SPARE ",
88         "JTAG ",
89         "MPU_INTPEND "
90 };
91
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94         "LPCMEMHOST",
95         "MGMT_MAC",
96         "PCS0ONLINE",
97         "MPU_IRAM",
98         "PCS1ONLINE",
99         "PCTL0",
100         "PCTL1",
101         "PMEM",
102         "RR",
103         "TXPB",
104         "RXPP",
105         "XAUI",
106         "TXP",
107         "ARM",
108         "IPC",
109         "HOST2",
110         "HOST3",
111         "HOST4",
112         "HOST5",
113         "HOST6",
114         "HOST7",
115         "ECRC",
116         "Poison TLP",
117         "NETC",
118         "PERIPH",
119         "LLTXULP",
120         "D2P",
121         "RCON",
122         "LDMA",
123         "LLTXP",
124         "LLTXPB",
125         "Unknown"
126 };
127
128 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
129                                  BE_IF_FLAGS_BROADCAST | \
130                                  BE_IF_FLAGS_MULTICAST | \
131                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
132
133 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
134 {
135         struct be_dma_mem *mem = &q->dma_mem;
136
137         if (mem->va) {
138                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
139                                   mem->dma);
140                 mem->va = NULL;
141         }
142 }
143
144 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
145                           u16 len, u16 entry_size)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         memset(q, 0, sizeof(*q));
150         q->len = len;
151         q->entry_size = entry_size;
152         mem->size = len * entry_size;
153         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
154                                       GFP_KERNEL);
155         if (!mem->va)
156                 return -ENOMEM;
157         return 0;
158 }
159
160 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
161 {
162         u32 reg, enabled;
163
164         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
165                               &reg);
166         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167
168         if (!enabled && enable)
169                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
170         else if (enabled && !enable)
171                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
172         else
173                 return;
174
175         pci_write_config_dword(adapter->pdev,
176                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
177 }
178
179 static void be_intr_set(struct be_adapter *adapter, bool enable)
180 {
181         int status = 0;
182
183         /* On lancer interrupts can't be controlled via this register */
184         if (lancer_chip(adapter))
185                 return;
186
187         if (be_check_error(adapter, BE_ERROR_EEH))
188                 return;
189
190         status = be_cmd_intr_set(adapter, enable);
191         if (status)
192                 be_reg_intr_set(adapter, enable);
193 }
194
195 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
196 {
197         u32 val = 0;
198
199         if (be_check_error(adapter, BE_ERROR_HW))
200                 return;
201
202         val |= qid & DB_RQ_RING_ID_MASK;
203         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
204
205         wmb();
206         iowrite32(val, adapter->db + DB_RQ_OFFSET);
207 }
208
209 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
210                           u16 posted)
211 {
212         u32 val = 0;
213
214         if (be_check_error(adapter, BE_ERROR_HW))
215                 return;
216
217         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
218         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
219
220         wmb();
221         iowrite32(val, adapter->db + txo->db_offset);
222 }
223
224 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
225                          bool arm, bool clear_int, u16 num_popped,
226                          u32 eq_delay_mult_enc)
227 {
228         u32 val = 0;
229
230         val |= qid & DB_EQ_RING_ID_MASK;
231         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
232
233         if (be_check_error(adapter, BE_ERROR_HW))
234                 return;
235
236         if (arm)
237                 val |= 1 << DB_EQ_REARM_SHIFT;
238         if (clear_int)
239                 val |= 1 << DB_EQ_CLR_SHIFT;
240         val |= 1 << DB_EQ_EVNT_SHIFT;
241         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
242         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
243         iowrite32(val, adapter->db + DB_EQ_OFFSET);
244 }
245
246 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
247 {
248         u32 val = 0;
249
250         val |= qid & DB_CQ_RING_ID_MASK;
251         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
252                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
253
254         if (be_check_error(adapter, BE_ERROR_HW))
255                 return;
256
257         if (arm)
258                 val |= 1 << DB_CQ_REARM_SHIFT;
259         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
260         iowrite32(val, adapter->db + DB_CQ_OFFSET);
261 }
262
263 static int be_mac_addr_set(struct net_device *netdev, void *p)
264 {
265         struct be_adapter *adapter = netdev_priv(netdev);
266         struct device *dev = &adapter->pdev->dev;
267         struct sockaddr *addr = p;
268         int status;
269         u8 mac[ETH_ALEN];
270         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
271
272         if (!is_valid_ether_addr(addr->sa_data))
273                 return -EADDRNOTAVAIL;
274
275         /* Proceed further only if, User provided MAC is different
276          * from active MAC
277          */
278         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
279                 return 0;
280
281         /* if device is not running, copy MAC to netdev->dev_addr */
282         if (!netif_running(netdev))
283                 goto done;
284
285         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
286          * privilege or if PF did not provision the new MAC address.
287          * On BE3, this cmd will always fail if the VF doesn't have the
288          * FILTMGMT privilege. This failure is OK, only if the PF programmed
289          * the MAC for the VF.
290          */
291         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
292                                  adapter->if_handle, &adapter->pmac_id[0], 0);
293         if (!status) {
294                 curr_pmac_id = adapter->pmac_id[0];
295
296                 /* Delete the old programmed MAC. This call may fail if the
297                  * old MAC was already deleted by the PF driver.
298                  */
299                 if (adapter->pmac_id[0] != old_pmac_id)
300                         be_cmd_pmac_del(adapter, adapter->if_handle,
301                                         old_pmac_id, 0);
302         }
303
304         /* Decide if the new MAC is successfully activated only after
305          * querying the FW
306          */
307         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
308                                        adapter->if_handle, true, 0);
309         if (status)
310                 goto err;
311
312         /* The MAC change did not happen, either due to lack of privilege
313          * or PF didn't pre-provision.
314          */
315         if (!ether_addr_equal(addr->sa_data, mac)) {
316                 status = -EPERM;
317                 goto err;
318         }
319 done:
320         ether_addr_copy(netdev->dev_addr, addr->sa_data);
321         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
322         return 0;
323 err:
324         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
325         return status;
326 }
327
328 /* BE2 supports only v0 cmd */
329 static void *hw_stats_from_cmd(struct be_adapter *adapter)
330 {
331         if (BE2_chip(adapter)) {
332                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
333
334                 return &cmd->hw_stats;
335         } else if (BE3_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else {
340                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         }
344 }
345
346 /* BE2 supports only v0 cmd */
347 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
348 {
349         if (BE2_chip(adapter)) {
350                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
351
352                 return &hw_stats->erx;
353         } else if (BE3_chip(adapter)) {
354                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else {
358                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         }
362 }
363
364 static void populate_be_v0_stats(struct be_adapter *adapter)
365 {
366         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
367         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
368         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
369         struct be_port_rxf_stats_v0 *port_stats =
370                                         &rxf_stats->port[adapter->port_num];
371         struct be_drv_stats *drvs = &adapter->drv_stats;
372
373         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
374         drvs->rx_pause_frames = port_stats->rx_pause_frames;
375         drvs->rx_crc_errors = port_stats->rx_crc_errors;
376         drvs->rx_control_frames = port_stats->rx_control_frames;
377         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
378         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
379         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
380         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
381         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
382         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
383         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
384         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
385         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
386         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
387         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
388         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
389         drvs->rx_dropped_header_too_small =
390                 port_stats->rx_dropped_header_too_small;
391         drvs->rx_address_filtered =
392                                         port_stats->rx_address_filtered +
393                                         port_stats->rx_vlan_filtered;
394         drvs->rx_alignment_symbol_errors =
395                 port_stats->rx_alignment_symbol_errors;
396
397         drvs->tx_pauseframes = port_stats->tx_pauseframes;
398         drvs->tx_controlframes = port_stats->tx_controlframes;
399
400         if (adapter->port_num)
401                 drvs->jabber_events = rxf_stats->port1_jabber_events;
402         else
403                 drvs->jabber_events = rxf_stats->port0_jabber_events;
404         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
405         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
406         drvs->forwarded_packets = rxf_stats->forwarded_packets;
407         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
408         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
409         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
410         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
411 }
412
413 static void populate_be_v1_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v1 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
424         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
435         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
436         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
437         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_input_fifo_overflow_drop =
441                 port_stats->rx_input_fifo_overflow_drop;
442         drvs->rx_address_filtered = port_stats->rx_address_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
449         drvs->jabber_events = port_stats->jabber_events;
450         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
451         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
452         drvs->forwarded_packets = rxf_stats->forwarded_packets;
453         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
454         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
455         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
456         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
457 }
458
459 static void populate_be_v2_stats(struct be_adapter *adapter)
460 {
461         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
462         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
463         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
464         struct be_port_rxf_stats_v2 *port_stats =
465                                         &rxf_stats->port[adapter->port_num];
466         struct be_drv_stats *drvs = &adapter->drv_stats;
467
468         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
469         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
470         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
471         drvs->rx_pause_frames = port_stats->rx_pause_frames;
472         drvs->rx_crc_errors = port_stats->rx_crc_errors;
473         drvs->rx_control_frames = port_stats->rx_control_frames;
474         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
475         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
476         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
477         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
478         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
479         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
480         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
481         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
482         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
483         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
484         drvs->rx_dropped_header_too_small =
485                 port_stats->rx_dropped_header_too_small;
486         drvs->rx_input_fifo_overflow_drop =
487                 port_stats->rx_input_fifo_overflow_drop;
488         drvs->rx_address_filtered = port_stats->rx_address_filtered;
489         drvs->rx_alignment_symbol_errors =
490                 port_stats->rx_alignment_symbol_errors;
491         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
492         drvs->tx_pauseframes = port_stats->tx_pauseframes;
493         drvs->tx_controlframes = port_stats->tx_controlframes;
494         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
495         drvs->jabber_events = port_stats->jabber_events;
496         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
497         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
498         drvs->forwarded_packets = rxf_stats->forwarded_packets;
499         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
500         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
501         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
502         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
503         if (be_roce_supported(adapter)) {
504                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
505                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
506                 drvs->rx_roce_frames = port_stats->roce_frames_received;
507                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
508                 drvs->roce_drops_payload_len =
509                         port_stats->roce_drops_payload_len;
510         }
511 }
512
513 static void populate_lancer_stats(struct be_adapter *adapter)
514 {
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
517
518         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
519         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
520         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
521         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
522         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
523         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
524         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
525         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
526         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
527         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
528         drvs->rx_dropped_tcp_length =
529                                 pport_stats->rx_dropped_invalid_tcp_length;
530         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
533         drvs->rx_dropped_header_too_small =
534                                 pport_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536         drvs->rx_address_filtered =
537                                         pport_stats->rx_address_filtered +
538                                         pport_stats->rx_vlan_filtered;
539         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
540         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
541         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
542         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
543         drvs->jabber_events = pport_stats->rx_jabbers;
544         drvs->forwarded_packets = pport_stats->num_forwards_lo;
545         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
546         drvs->rx_drops_too_many_frags =
547                                 pport_stats->rx_drops_too_many_frags_lo;
548 }
549
550 static void accumulate_16bit_val(u32 *acc, u16 val)
551 {
552 #define lo(x)                   (x & 0xFFFF)
553 #define hi(x)                   (x & 0xFFFF0000)
554         bool wrapped = val < lo(*acc);
555         u32 newacc = hi(*acc) + val;
556
557         if (wrapped)
558                 newacc += 65536;
559         ACCESS_ONCE(*acc) = newacc;
560 }
561
562 static void populate_erx_stats(struct be_adapter *adapter,
563                                struct be_rx_obj *rxo, u32 erx_stat)
564 {
565         if (!BEx_chip(adapter))
566                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
567         else
568                 /* below erx HW counter can actually wrap around after
569                  * 65535. Driver accumulates a 32-bit value
570                  */
571                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
572                                      (u16)erx_stat);
573 }
574
575 void be_parse_stats(struct be_adapter *adapter)
576 {
577         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
578         struct be_rx_obj *rxo;
579         int i;
580         u32 erx_stat;
581
582         if (lancer_chip(adapter)) {
583                 populate_lancer_stats(adapter);
584         } else {
585                 if (BE2_chip(adapter))
586                         populate_be_v0_stats(adapter);
587                 else if (BE3_chip(adapter))
588                         /* for BE3 */
589                         populate_be_v1_stats(adapter);
590                 else
591                         populate_be_v2_stats(adapter);
592
593                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
594                 for_all_rx_queues(adapter, rxo, i) {
595                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
596                         populate_erx_stats(adapter, rxo, erx_stat);
597                 }
598         }
599 }
600
601 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
602                                                 struct rtnl_link_stats64 *stats)
603 {
604         struct be_adapter *adapter = netdev_priv(netdev);
605         struct be_drv_stats *drvs = &adapter->drv_stats;
606         struct be_rx_obj *rxo;
607         struct be_tx_obj *txo;
608         u64 pkts, bytes;
609         unsigned int start;
610         int i;
611
612         for_all_rx_queues(adapter, rxo, i) {
613                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
614
615                 do {
616                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
617                         pkts = rx_stats(rxo)->rx_pkts;
618                         bytes = rx_stats(rxo)->rx_bytes;
619                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
620                 stats->rx_packets += pkts;
621                 stats->rx_bytes += bytes;
622                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
623                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
624                                         rx_stats(rxo)->rx_drops_no_frags;
625         }
626
627         for_all_tx_queues(adapter, txo, i) {
628                 const struct be_tx_stats *tx_stats = tx_stats(txo);
629
630                 do {
631                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
632                         pkts = tx_stats(txo)->tx_pkts;
633                         bytes = tx_stats(txo)->tx_bytes;
634                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
635                 stats->tx_packets += pkts;
636                 stats->tx_bytes += bytes;
637         }
638
639         /* bad pkts received */
640         stats->rx_errors = drvs->rx_crc_errors +
641                 drvs->rx_alignment_symbol_errors +
642                 drvs->rx_in_range_errors +
643                 drvs->rx_out_range_errors +
644                 drvs->rx_frame_too_long +
645                 drvs->rx_dropped_too_small +
646                 drvs->rx_dropped_too_short +
647                 drvs->rx_dropped_header_too_small +
648                 drvs->rx_dropped_tcp_length +
649                 drvs->rx_dropped_runt;
650
651         /* detailed rx errors */
652         stats->rx_length_errors = drvs->rx_in_range_errors +
653                 drvs->rx_out_range_errors +
654                 drvs->rx_frame_too_long;
655
656         stats->rx_crc_errors = drvs->rx_crc_errors;
657
658         /* frame alignment errors */
659         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
660
661         /* receiver fifo overrun */
662         /* drops_no_pbuf is no per i/f, it's per BE card */
663         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
664                                 drvs->rx_input_fifo_overflow_drop +
665                                 drvs->rx_drops_no_pbuf;
666         return stats;
667 }
668
669 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
670 {
671         struct net_device *netdev = adapter->netdev;
672
673         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
674                 netif_carrier_off(netdev);
675                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
676         }
677
678         if (link_status)
679                 netif_carrier_on(netdev);
680         else
681                 netif_carrier_off(netdev);
682
683         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
684 }
685
686 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
687 {
688         struct be_tx_stats *stats = tx_stats(txo);
689         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
690
691         u64_stats_update_begin(&stats->sync);
692         stats->tx_reqs++;
693         stats->tx_bytes += skb->len;
694         stats->tx_pkts += tx_pkts;
695         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
696                 stats->tx_vxlan_offload_pkts += tx_pkts;
697         u64_stats_update_end(&stats->sync);
698 }
699
700 /* Returns number of WRBs needed for the skb */
701 static u32 skb_wrb_cnt(struct sk_buff *skb)
702 {
703         /* +1 for the header wrb */
704         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
705 }
706
707 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
708 {
709         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
710         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
711         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
712         wrb->rsvd0 = 0;
713 }
714
715 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
716  * to avoid the swap and shift/mask operations in wrb_fill().
717  */
718 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
719 {
720         wrb->frag_pa_hi = 0;
721         wrb->frag_pa_lo = 0;
722         wrb->frag_len = 0;
723         wrb->rsvd0 = 0;
724 }
725
726 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
727                                      struct sk_buff *skb)
728 {
729         u8 vlan_prio;
730         u16 vlan_tag;
731
732         vlan_tag = skb_vlan_tag_get(skb);
733         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
734         /* If vlan priority provided by OS is NOT in available bmap */
735         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
736                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
737                                 adapter->recommended_prio_bits;
738
739         return vlan_tag;
740 }
741
742 /* Used only for IP tunnel packets */
743 static u16 skb_inner_ip_proto(struct sk_buff *skb)
744 {
745         return (inner_ip_hdr(skb)->version == 4) ?
746                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
747 }
748
749 static u16 skb_ip_proto(struct sk_buff *skb)
750 {
751         return (ip_hdr(skb)->version == 4) ?
752                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
753 }
754
755 static inline bool be_is_txq_full(struct be_tx_obj *txo)
756 {
757         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
758 }
759
760 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
761 {
762         return atomic_read(&txo->q.used) < txo->q.len / 2;
763 }
764
765 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
766 {
767         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
768 }
769
770 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
771                                        struct sk_buff *skb,
772                                        struct be_wrb_params *wrb_params)
773 {
774         u16 proto;
775
776         if (skb_is_gso(skb)) {
777                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
778                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
779                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
780                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
781         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
782                 if (skb->encapsulation) {
783                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
784                         proto = skb_inner_ip_proto(skb);
785                 } else {
786                         proto = skb_ip_proto(skb);
787                 }
788                 if (proto == IPPROTO_TCP)
789                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
790                 else if (proto == IPPROTO_UDP)
791                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
792         }
793
794         if (skb_vlan_tag_present(skb)) {
795                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
796                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
797         }
798
799         BE_WRB_F_SET(wrb_params->features, CRC, 1);
800 }
801
802 static void wrb_fill_hdr(struct be_adapter *adapter,
803                          struct be_eth_hdr_wrb *hdr,
804                          struct be_wrb_params *wrb_params,
805                          struct sk_buff *skb)
806 {
807         memset(hdr, 0, sizeof(*hdr));
808
809         SET_TX_WRB_HDR_BITS(crc, hdr,
810                             BE_WRB_F_GET(wrb_params->features, CRC));
811         SET_TX_WRB_HDR_BITS(ipcs, hdr,
812                             BE_WRB_F_GET(wrb_params->features, IPCS));
813         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
814                             BE_WRB_F_GET(wrb_params->features, TCPCS));
815         SET_TX_WRB_HDR_BITS(udpcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, UDPCS));
817
818         SET_TX_WRB_HDR_BITS(lso, hdr,
819                             BE_WRB_F_GET(wrb_params->features, LSO));
820         SET_TX_WRB_HDR_BITS(lso6, hdr,
821                             BE_WRB_F_GET(wrb_params->features, LSO6));
822         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
823
824         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
825          * hack is not needed, the evt bit is set while ringing DB.
826          */
827         SET_TX_WRB_HDR_BITS(event, hdr,
828                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
829         SET_TX_WRB_HDR_BITS(vlan, hdr,
830                             BE_WRB_F_GET(wrb_params->features, VLAN));
831         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
832
833         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
834         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
835         SET_TX_WRB_HDR_BITS(mgmt, hdr,
836                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
837 }
838
839 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
840                           bool unmap_single)
841 {
842         dma_addr_t dma;
843         u32 frag_len = le32_to_cpu(wrb->frag_len);
844
845
846         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
847                 (u64)le32_to_cpu(wrb->frag_pa_lo);
848         if (frag_len) {
849                 if (unmap_single)
850                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
851                 else
852                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
853         }
854 }
855
856 /* Grab a WRB header for xmit */
857 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
858 {
859         u32 head = txo->q.head;
860
861         queue_head_inc(&txo->q);
862         return head;
863 }
864
865 /* Set up the WRB header for xmit */
866 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
867                                 struct be_tx_obj *txo,
868                                 struct be_wrb_params *wrb_params,
869                                 struct sk_buff *skb, u16 head)
870 {
871         u32 num_frags = skb_wrb_cnt(skb);
872         struct be_queue_info *txq = &txo->q;
873         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
874
875         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
876         be_dws_cpu_to_le(hdr, sizeof(*hdr));
877
878         BUG_ON(txo->sent_skb_list[head]);
879         txo->sent_skb_list[head] = skb;
880         txo->last_req_hdr = head;
881         atomic_add(num_frags, &txq->used);
882         txo->last_req_wrb_cnt = num_frags;
883         txo->pend_wrb_cnt += num_frags;
884 }
885
886 /* Setup a WRB fragment (buffer descriptor) for xmit */
887 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
888                                  int len)
889 {
890         struct be_eth_wrb *wrb;
891         struct be_queue_info *txq = &txo->q;
892
893         wrb = queue_head_node(txq);
894         wrb_fill(wrb, busaddr, len);
895         queue_head_inc(txq);
896 }
897
898 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
899  * was invoked. The producer index is restored to the previous packet and the
900  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
901  */
902 static void be_xmit_restore(struct be_adapter *adapter,
903                             struct be_tx_obj *txo, u32 head, bool map_single,
904                             u32 copied)
905 {
906         struct device *dev;
907         struct be_eth_wrb *wrb;
908         struct be_queue_info *txq = &txo->q;
909
910         dev = &adapter->pdev->dev;
911         txq->head = head;
912
913         /* skip the first wrb (hdr); it's not mapped */
914         queue_head_inc(txq);
915         while (copied) {
916                 wrb = queue_head_node(txq);
917                 unmap_tx_frag(dev, wrb, map_single);
918                 map_single = false;
919                 copied -= le32_to_cpu(wrb->frag_len);
920                 queue_head_inc(txq);
921         }
922
923         txq->head = head;
924 }
925
926 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
927  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
928  * of WRBs used up by the packet.
929  */
930 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
931                            struct sk_buff *skb,
932                            struct be_wrb_params *wrb_params)
933 {
934         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
935         struct device *dev = &adapter->pdev->dev;
936         struct be_queue_info *txq = &txo->q;
937         bool map_single = false;
938         u32 head = txq->head;
939         dma_addr_t busaddr;
940         int len;
941
942         head = be_tx_get_wrb_hdr(txo);
943
944         if (skb->len > skb->data_len) {
945                 len = skb_headlen(skb);
946
947                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
948                 if (dma_mapping_error(dev, busaddr))
949                         goto dma_err;
950                 map_single = true;
951                 be_tx_setup_wrb_frag(txo, busaddr, len);
952                 copied += len;
953         }
954
955         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
957                 len = skb_frag_size(frag);
958
959                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
960                 if (dma_mapping_error(dev, busaddr))
961                         goto dma_err;
962                 be_tx_setup_wrb_frag(txo, busaddr, len);
963                 copied += len;
964         }
965
966         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
967
968         be_tx_stats_update(txo, skb);
969         return wrb_cnt;
970
971 dma_err:
972         adapter->drv_stats.dma_map_errors++;
973         be_xmit_restore(adapter, txo, head, map_single, copied);
974         return 0;
975 }
976
977 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
978 {
979         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
980 }
981
982 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
983                                              struct sk_buff *skb,
984                                              struct be_wrb_params
985                                              *wrb_params)
986 {
987         u16 vlan_tag = 0;
988
989         skb = skb_share_check(skb, GFP_ATOMIC);
990         if (unlikely(!skb))
991                 return skb;
992
993         if (skb_vlan_tag_present(skb))
994                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
995
996         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
997                 if (!vlan_tag)
998                         vlan_tag = adapter->pvid;
999                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1000                  * skip VLAN insertion
1001                  */
1002                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1003         }
1004
1005         if (vlan_tag) {
1006                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1007                                                 vlan_tag);
1008                 if (unlikely(!skb))
1009                         return skb;
1010                 skb->vlan_tci = 0;
1011         }
1012
1013         /* Insert the outer VLAN, if any */
1014         if (adapter->qnq_vid) {
1015                 vlan_tag = adapter->qnq_vid;
1016                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1017                                                 vlan_tag);
1018                 if (unlikely(!skb))
1019                         return skb;
1020                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1021         }
1022
1023         return skb;
1024 }
1025
1026 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1027 {
1028         struct ethhdr *eh = (struct ethhdr *)skb->data;
1029         u16 offset = ETH_HLEN;
1030
1031         if (eh->h_proto == htons(ETH_P_IPV6)) {
1032                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1033
1034                 offset += sizeof(struct ipv6hdr);
1035                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1036                     ip6h->nexthdr != NEXTHDR_UDP) {
1037                         struct ipv6_opt_hdr *ehdr =
1038                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1039
1040                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1041                         if (ehdr->hdrlen == 0xff)
1042                                 return true;
1043                 }
1044         }
1045         return false;
1046 }
1047
1048 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1051 }
1052
1053 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1054 {
1055         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1056 }
1057
1058 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1059                                                   struct sk_buff *skb,
1060                                                   struct be_wrb_params
1061                                                   *wrb_params)
1062 {
1063         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1064         unsigned int eth_hdr_len;
1065         struct iphdr *ip;
1066
1067         /* For padded packets, BE HW modifies tot_len field in IP header
1068          * incorrecly when VLAN tag is inserted by HW.
1069          * For padded packets, Lancer computes incorrect checksum.
1070          */
1071         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1072                                                 VLAN_ETH_HLEN : ETH_HLEN;
1073         if (skb->len <= 60 &&
1074             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1075             is_ipv4_pkt(skb)) {
1076                 ip = (struct iphdr *)ip_hdr(skb);
1077                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1078         }
1079
1080         /* If vlan tag is already inlined in the packet, skip HW VLAN
1081          * tagging in pvid-tagging mode
1082          */
1083         if (be_pvid_tagging_enabled(adapter) &&
1084             veh->h_vlan_proto == htons(ETH_P_8021Q))
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086
1087         /* HW has a bug wherein it will calculate CSUM for VLAN
1088          * pkts even though it is disabled.
1089          * Manually insert VLAN in pkt.
1090          */
1091         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1092             skb_vlan_tag_present(skb)) {
1093                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1094                 if (unlikely(!skb))
1095                         goto err;
1096         }
1097
1098         /* HW may lockup when VLAN HW tagging is requested on
1099          * certain ipv6 packets. Drop such pkts if the HW workaround to
1100          * skip HW tagging is not enabled by FW.
1101          */
1102         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1103                      (adapter->pvid || adapter->qnq_vid) &&
1104                      !qnq_async_evt_rcvd(adapter)))
1105                 goto tx_drop;
1106
1107         /* Manual VLAN tag insertion to prevent:
1108          * ASIC lockup when the ASIC inserts VLAN tag into
1109          * certain ipv6 packets. Insert VLAN tags in driver,
1110          * and set event, completion, vlan bits accordingly
1111          * in the Tx WRB.
1112          */
1113         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1114             be_vlan_tag_tx_chk(adapter, skb)) {
1115                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1116                 if (unlikely(!skb))
1117                         goto err;
1118         }
1119
1120         return skb;
1121 tx_drop:
1122         dev_kfree_skb_any(skb);
1123 err:
1124         return NULL;
1125 }
1126
1127 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1128                                            struct sk_buff *skb,
1129                                            struct be_wrb_params *wrb_params)
1130 {
1131         int err;
1132
1133         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1134          * packets that are 32b or less may cause a transmit stall
1135          * on that port. The workaround is to pad such packets
1136          * (len <= 32 bytes) to a minimum length of 36b.
1137          */
1138         if (skb->len <= 32) {
1139                 if (skb_put_padto(skb, 36))
1140                         return NULL;
1141         }
1142
1143         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1144                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1145                 if (!skb)
1146                         return NULL;
1147         }
1148
1149         /* The stack can send us skbs with length greater than
1150          * what the HW can handle. Trim the extra bytes.
1151          */
1152         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1153         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1154         WARN_ON(err);
1155
1156         return skb;
1157 }
1158
1159 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1160 {
1161         struct be_queue_info *txq = &txo->q;
1162         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1163
1164         /* Mark the last request eventable if it hasn't been marked already */
1165         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1166                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1167
1168         /* compose a dummy wrb if there are odd set of wrbs to notify */
1169         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1170                 wrb_fill_dummy(queue_head_node(txq));
1171                 queue_head_inc(txq);
1172                 atomic_inc(&txq->used);
1173                 txo->pend_wrb_cnt++;
1174                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1175                                            TX_HDR_WRB_NUM_SHIFT);
1176                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1177                                           TX_HDR_WRB_NUM_SHIFT);
1178         }
1179         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1180         txo->pend_wrb_cnt = 0;
1181 }
1182
1183 /* OS2BMC related */
1184
1185 #define DHCP_CLIENT_PORT        68
1186 #define DHCP_SERVER_PORT        67
1187 #define NET_BIOS_PORT1          137
1188 #define NET_BIOS_PORT2          138
1189 #define DHCPV6_RAS_PORT         547
1190
1191 #define is_mc_allowed_on_bmc(adapter, eh)       \
1192         (!is_multicast_filt_enabled(adapter) && \
1193          is_multicast_ether_addr(eh->h_dest) && \
1194          !is_broadcast_ether_addr(eh->h_dest))
1195
1196 #define is_bc_allowed_on_bmc(adapter, eh)       \
1197         (!is_broadcast_filt_enabled(adapter) && \
1198          is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_arp_allowed_on_bmc(adapter, skb)     \
1201         (is_arp(skb) && is_arp_filt_enabled(adapter))
1202
1203 #define is_broadcast_packet(eh, adapter)        \
1204                 (is_multicast_ether_addr(eh->h_dest) && \
1205                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1206
1207 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1208
1209 #define is_arp_filt_enabled(adapter)    \
1210                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1211
1212 #define is_dhcp_client_filt_enabled(adapter)    \
1213                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1214
1215 #define is_dhcp_srvr_filt_enabled(adapter)      \
1216                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1217
1218 #define is_nbios_filt_enabled(adapter)  \
1219                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1220
1221 #define is_ipv6_na_filt_enabled(adapter)        \
1222                 (adapter->bmc_filt_mask &       \
1223                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1224
1225 #define is_ipv6_ra_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1227
1228 #define is_ipv6_ras_filt_enabled(adapter)       \
1229                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1230
1231 #define is_broadcast_filt_enabled(adapter)      \
1232                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1233
1234 #define is_multicast_filt_enabled(adapter)      \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1236
1237 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1238                                struct sk_buff **skb)
1239 {
1240         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1241         bool os2bmc = false;
1242
1243         if (!be_is_os2bmc_enabled(adapter))
1244                 goto done;
1245
1246         if (!is_multicast_ether_addr(eh->h_dest))
1247                 goto done;
1248
1249         if (is_mc_allowed_on_bmc(adapter, eh) ||
1250             is_bc_allowed_on_bmc(adapter, eh) ||
1251             is_arp_allowed_on_bmc(adapter, (*skb))) {
1252                 os2bmc = true;
1253                 goto done;
1254         }
1255
1256         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1257                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1258                 u8 nexthdr = hdr->nexthdr;
1259
1260                 if (nexthdr == IPPROTO_ICMPV6) {
1261                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1262
1263                         switch (icmp6->icmp6_type) {
1264                         case NDISC_ROUTER_ADVERTISEMENT:
1265                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1266                                 goto done;
1267                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1268                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1269                                 goto done;
1270                         default:
1271                                 break;
1272                         }
1273                 }
1274         }
1275
1276         if (is_udp_pkt((*skb))) {
1277                 struct udphdr *udp = udp_hdr((*skb));
1278
1279                 switch (ntohs(udp->dest)) {
1280                 case DHCP_CLIENT_PORT:
1281                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1282                         goto done;
1283                 case DHCP_SERVER_PORT:
1284                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1285                         goto done;
1286                 case NET_BIOS_PORT1:
1287                 case NET_BIOS_PORT2:
1288                         os2bmc = is_nbios_filt_enabled(adapter);
1289                         goto done;
1290                 case DHCPV6_RAS_PORT:
1291                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1292                         goto done;
1293                 default:
1294                         break;
1295                 }
1296         }
1297 done:
1298         /* For packets over a vlan, which are destined
1299          * to BMC, asic expects the vlan to be inline in the packet.
1300          */
1301         if (os2bmc)
1302                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1303
1304         return os2bmc;
1305 }
1306
1307 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1308 {
1309         struct be_adapter *adapter = netdev_priv(netdev);
1310         u16 q_idx = skb_get_queue_mapping(skb);
1311         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1312         struct be_wrb_params wrb_params = { 0 };
1313         bool flush = !skb->xmit_more;
1314         u16 wrb_cnt;
1315
1316         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1317         if (unlikely(!skb))
1318                 goto drop;
1319
1320         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1321
1322         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1323         if (unlikely(!wrb_cnt)) {
1324                 dev_kfree_skb_any(skb);
1325                 goto drop;
1326         }
1327
1328         /* if os2bmc is enabled and if the pkt is destined to bmc,
1329          * enqueue the pkt a 2nd time with mgmt bit set.
1330          */
1331         if (be_send_pkt_to_bmc(adapter, &skb)) {
1332                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1333                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1334                 if (unlikely(!wrb_cnt))
1335                         goto drop;
1336                 else
1337                         skb_get(skb);
1338         }
1339
1340         if (be_is_txq_full(txo)) {
1341                 netif_stop_subqueue(netdev, q_idx);
1342                 tx_stats(txo)->tx_stops++;
1343         }
1344
1345         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1346                 be_xmit_flush(adapter, txo);
1347
1348         return NETDEV_TX_OK;
1349 drop:
1350         tx_stats(txo)->tx_drv_drops++;
1351         /* Flush the already enqueued tx requests */
1352         if (flush && txo->pend_wrb_cnt)
1353                 be_xmit_flush(adapter, txo);
1354
1355         return NETDEV_TX_OK;
1356 }
1357
1358 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         struct device *dev = &adapter->pdev->dev;
1362
1363         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1364                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1365                          BE_MIN_MTU, BE_MAX_MTU);
1366                 return -EINVAL;
1367         }
1368
1369         dev_info(dev, "MTU changed from %d to %d bytes\n",
1370                  netdev->mtu, new_mtu);
1371         netdev->mtu = new_mtu;
1372         return 0;
1373 }
1374
1375 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1376 {
1377         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1378                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1379 }
1380
1381 static int be_set_vlan_promisc(struct be_adapter *adapter)
1382 {
1383         struct device *dev = &adapter->pdev->dev;
1384         int status;
1385
1386         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1387                 return 0;
1388
1389         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1390         if (!status) {
1391                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1392                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1393         } else {
1394                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1395         }
1396         return status;
1397 }
1398
1399 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1400 {
1401         struct device *dev = &adapter->pdev->dev;
1402         int status;
1403
1404         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1405         if (!status) {
1406                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1407                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1408         }
1409         return status;
1410 }
1411
1412 /*
1413  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1414  * If the user configures more, place BE in vlan promiscuous mode.
1415  */
1416 static int be_vid_config(struct be_adapter *adapter)
1417 {
1418         struct device *dev = &adapter->pdev->dev;
1419         u16 vids[BE_NUM_VLANS_SUPPORTED];
1420         u16 num = 0, i = 0;
1421         int status = 0;
1422
1423         /* No need to further configure vids if in promiscuous mode */
1424         if (be_in_all_promisc(adapter))
1425                 return 0;
1426
1427         if (adapter->vlans_added > be_max_vlans(adapter))
1428                 return be_set_vlan_promisc(adapter);
1429
1430         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1431                 status = be_clear_vlan_promisc(adapter);
1432                 if (status)
1433                         return status;
1434         }
1435         /* Construct VLAN Table to give to HW */
1436         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1437                 vids[num++] = cpu_to_le16(i);
1438
1439         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1440         if (status) {
1441                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1442                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1443                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1444                     addl_status(status) ==
1445                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1446                         return be_set_vlan_promisc(adapter);
1447         }
1448         return status;
1449 }
1450
1451 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1452 {
1453         struct be_adapter *adapter = netdev_priv(netdev);
1454         int status = 0;
1455
1456         /* Packets with VID 0 are always received by Lancer by default */
1457         if (lancer_chip(adapter) && vid == 0)
1458                 return status;
1459
1460         if (test_bit(vid, adapter->vids))
1461                 return status;
1462
1463         set_bit(vid, adapter->vids);
1464         adapter->vlans_added++;
1465
1466         status = be_vid_config(adapter);
1467         if (status) {
1468                 adapter->vlans_added--;
1469                 clear_bit(vid, adapter->vids);
1470         }
1471
1472         return status;
1473 }
1474
1475 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1476 {
1477         struct be_adapter *adapter = netdev_priv(netdev);
1478
1479         /* Packets with VID 0 are always received by Lancer by default */
1480         if (lancer_chip(adapter) && vid == 0)
1481                 return 0;
1482
1483         if (!test_bit(vid, adapter->vids))
1484                 return 0;
1485
1486         clear_bit(vid, adapter->vids);
1487         adapter->vlans_added--;
1488
1489         return be_vid_config(adapter);
1490 }
1491
1492 static void be_clear_all_promisc(struct be_adapter *adapter)
1493 {
1494         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
1495         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
1496 }
1497
1498 static void be_set_all_promisc(struct be_adapter *adapter)
1499 {
1500         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1501         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1502 }
1503
1504 static void be_set_mc_promisc(struct be_adapter *adapter)
1505 {
1506         int status;
1507
1508         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1509                 return;
1510
1511         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1512         if (!status)
1513                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1514 }
1515
1516 static void be_set_mc_list(struct be_adapter *adapter)
1517 {
1518         int status;
1519
1520         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1521         if (!status)
1522                 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1523         else
1524                 be_set_mc_promisc(adapter);
1525 }
1526
1527 static void be_set_uc_list(struct be_adapter *adapter)
1528 {
1529         struct netdev_hw_addr *ha;
1530         int i = 1; /* First slot is claimed by the Primary MAC */
1531
1532         for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1533                 be_cmd_pmac_del(adapter, adapter->if_handle,
1534                                 adapter->pmac_id[i], 0);
1535
1536         if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
1537                 be_set_all_promisc(adapter);
1538                 return;
1539         }
1540
1541         netdev_for_each_uc_addr(ha, adapter->netdev) {
1542                 adapter->uc_macs++; /* First slot is for Primary MAC */
1543                 be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
1544                                 &adapter->pmac_id[adapter->uc_macs], 0);
1545         }
1546 }
1547
1548 static void be_clear_uc_list(struct be_adapter *adapter)
1549 {
1550         int i;
1551
1552         for (i = 1; i < (adapter->uc_macs + 1); i++)
1553                 be_cmd_pmac_del(adapter, adapter->if_handle,
1554                                 adapter->pmac_id[i], 0);
1555         adapter->uc_macs = 0;
1556 }
1557
1558 static void be_set_rx_mode(struct net_device *netdev)
1559 {
1560         struct be_adapter *adapter = netdev_priv(netdev);
1561
1562         if (netdev->flags & IFF_PROMISC) {
1563                 be_set_all_promisc(adapter);
1564                 return;
1565         }
1566
1567         /* Interface was previously in promiscuous mode; disable it */
1568         if (be_in_all_promisc(adapter)) {
1569                 be_clear_all_promisc(adapter);
1570                 if (adapter->vlans_added)
1571                         be_vid_config(adapter);
1572         }
1573
1574         /* Enable multicast promisc if num configured exceeds what we support */
1575         if (netdev->flags & IFF_ALLMULTI ||
1576             netdev_mc_count(netdev) > be_max_mc(adapter)) {
1577                 be_set_mc_promisc(adapter);
1578                 return;
1579         }
1580
1581         if (netdev_uc_count(netdev) != adapter->uc_macs)
1582                 be_set_uc_list(adapter);
1583
1584         be_set_mc_list(adapter);
1585 }
1586
1587 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1588 {
1589         struct be_adapter *adapter = netdev_priv(netdev);
1590         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1591         int status;
1592
1593         if (!sriov_enabled(adapter))
1594                 return -EPERM;
1595
1596         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1597                 return -EINVAL;
1598
1599         /* Proceed further only if user provided MAC is different
1600          * from active MAC
1601          */
1602         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1603                 return 0;
1604
1605         if (BEx_chip(adapter)) {
1606                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1607                                 vf + 1);
1608
1609                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1610                                          &vf_cfg->pmac_id, vf + 1);
1611         } else {
1612                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1613                                         vf + 1);
1614         }
1615
1616         if (status) {
1617                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1618                         mac, vf, status);
1619                 return be_cmd_status(status);
1620         }
1621
1622         ether_addr_copy(vf_cfg->mac_addr, mac);
1623
1624         return 0;
1625 }
1626
1627 static int be_get_vf_config(struct net_device *netdev, int vf,
1628                             struct ifla_vf_info *vi)
1629 {
1630         struct be_adapter *adapter = netdev_priv(netdev);
1631         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1632
1633         if (!sriov_enabled(adapter))
1634                 return -EPERM;
1635
1636         if (vf >= adapter->num_vfs)
1637                 return -EINVAL;
1638
1639         vi->vf = vf;
1640         vi->max_tx_rate = vf_cfg->tx_rate;
1641         vi->min_tx_rate = 0;
1642         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1643         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1644         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1645         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1646         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1647
1648         return 0;
1649 }
1650
1651 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1652 {
1653         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1654         u16 vids[BE_NUM_VLANS_SUPPORTED];
1655         int vf_if_id = vf_cfg->if_handle;
1656         int status;
1657
1658         /* Enable Transparent VLAN Tagging */
1659         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1660         if (status)
1661                 return status;
1662
1663         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1664         vids[0] = 0;
1665         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1666         if (!status)
1667                 dev_info(&adapter->pdev->dev,
1668                          "Cleared guest VLANs on VF%d", vf);
1669
1670         /* After TVT is enabled, disallow VFs to program VLAN filters */
1671         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1672                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1673                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1674                 if (!status)
1675                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1676         }
1677         return 0;
1678 }
1679
1680 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1681 {
1682         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1683         struct device *dev = &adapter->pdev->dev;
1684         int status;
1685
1686         /* Reset Transparent VLAN Tagging. */
1687         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1688                                        vf_cfg->if_handle, 0, 0);
1689         if (status)
1690                 return status;
1691
1692         /* Allow VFs to program VLAN filtering */
1693         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1694                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1695                                                   BE_PRIV_FILTMGMT, vf + 1);
1696                 if (!status) {
1697                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1698                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1699                 }
1700         }
1701
1702         dev_info(dev,
1703                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1704         return 0;
1705 }
1706
1707 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1708 {
1709         struct be_adapter *adapter = netdev_priv(netdev);
1710         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1711         int status;
1712
1713         if (!sriov_enabled(adapter))
1714                 return -EPERM;
1715
1716         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1717                 return -EINVAL;
1718
1719         if (vlan || qos) {
1720                 vlan |= qos << VLAN_PRIO_SHIFT;
1721                 status = be_set_vf_tvt(adapter, vf, vlan);
1722         } else {
1723                 status = be_clear_vf_tvt(adapter, vf);
1724         }
1725
1726         if (status) {
1727                 dev_err(&adapter->pdev->dev,
1728                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1729                         status);
1730                 return be_cmd_status(status);
1731         }
1732
1733         vf_cfg->vlan_tag = vlan;
1734         return 0;
1735 }
1736
1737 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1738                              int min_tx_rate, int max_tx_rate)
1739 {
1740         struct be_adapter *adapter = netdev_priv(netdev);
1741         struct device *dev = &adapter->pdev->dev;
1742         int percent_rate, status = 0;
1743         u16 link_speed = 0;
1744         u8 link_status;
1745
1746         if (!sriov_enabled(adapter))
1747                 return -EPERM;
1748
1749         if (vf >= adapter->num_vfs)
1750                 return -EINVAL;
1751
1752         if (min_tx_rate)
1753                 return -EINVAL;
1754
1755         if (!max_tx_rate)
1756                 goto config_qos;
1757
1758         status = be_cmd_link_status_query(adapter, &link_speed,
1759                                           &link_status, 0);
1760         if (status)
1761                 goto err;
1762
1763         if (!link_status) {
1764                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1765                 status = -ENETDOWN;
1766                 goto err;
1767         }
1768
1769         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1770                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1771                         link_speed);
1772                 status = -EINVAL;
1773                 goto err;
1774         }
1775
1776         /* On Skyhawk the QOS setting must be done only as a % value */
1777         percent_rate = link_speed / 100;
1778         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1779                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1780                         percent_rate);
1781                 status = -EINVAL;
1782                 goto err;
1783         }
1784
1785 config_qos:
1786         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1787         if (status)
1788                 goto err;
1789
1790         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1791         return 0;
1792
1793 err:
1794         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1795                 max_tx_rate, vf);
1796         return be_cmd_status(status);
1797 }
1798
1799 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1800                                 int link_state)
1801 {
1802         struct be_adapter *adapter = netdev_priv(netdev);
1803         int status;
1804
1805         if (!sriov_enabled(adapter))
1806                 return -EPERM;
1807
1808         if (vf >= adapter->num_vfs)
1809                 return -EINVAL;
1810
1811         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1812         if (status) {
1813                 dev_err(&adapter->pdev->dev,
1814                         "Link state change on VF %d failed: %#x\n", vf, status);
1815                 return be_cmd_status(status);
1816         }
1817
1818         adapter->vf_cfg[vf].plink_tracking = link_state;
1819
1820         return 0;
1821 }
1822
1823 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1824 {
1825         struct be_adapter *adapter = netdev_priv(netdev);
1826         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1827         u8 spoofchk;
1828         int status;
1829
1830         if (!sriov_enabled(adapter))
1831                 return -EPERM;
1832
1833         if (vf >= adapter->num_vfs)
1834                 return -EINVAL;
1835
1836         if (BEx_chip(adapter))
1837                 return -EOPNOTSUPP;
1838
1839         if (enable == vf_cfg->spoofchk)
1840                 return 0;
1841
1842         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1843
1844         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1845                                        0, spoofchk);
1846         if (status) {
1847                 dev_err(&adapter->pdev->dev,
1848                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1849                 return be_cmd_status(status);
1850         }
1851
1852         vf_cfg->spoofchk = enable;
1853         return 0;
1854 }
1855
1856 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1857                           ulong now)
1858 {
1859         aic->rx_pkts_prev = rx_pkts;
1860         aic->tx_reqs_prev = tx_pkts;
1861         aic->jiffies = now;
1862 }
1863
1864 static int be_get_new_eqd(struct be_eq_obj *eqo)
1865 {
1866         struct be_adapter *adapter = eqo->adapter;
1867         int eqd, start;
1868         struct be_aic_obj *aic;
1869         struct be_rx_obj *rxo;
1870         struct be_tx_obj *txo;
1871         u64 rx_pkts = 0, tx_pkts = 0;
1872         ulong now;
1873         u32 pps, delta;
1874         int i;
1875
1876         aic = &adapter->aic_obj[eqo->idx];
1877         if (!aic->enable) {
1878                 if (aic->jiffies)
1879                         aic->jiffies = 0;
1880                 eqd = aic->et_eqd;
1881                 return eqd;
1882         }
1883
1884         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1885                 do {
1886                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1887                         rx_pkts += rxo->stats.rx_pkts;
1888                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1889         }
1890
1891         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1892                 do {
1893                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1894                         tx_pkts += txo->stats.tx_reqs;
1895                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1896         }
1897
1898         /* Skip, if wrapped around or first calculation */
1899         now = jiffies;
1900         if (!aic->jiffies || time_before(now, aic->jiffies) ||
1901             rx_pkts < aic->rx_pkts_prev ||
1902             tx_pkts < aic->tx_reqs_prev) {
1903                 be_aic_update(aic, rx_pkts, tx_pkts, now);
1904                 return aic->prev_eqd;
1905         }
1906
1907         delta = jiffies_to_msecs(now - aic->jiffies);
1908         if (delta == 0)
1909                 return aic->prev_eqd;
1910
1911         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1912                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1913         eqd = (pps / 15000) << 2;
1914
1915         if (eqd < 8)
1916                 eqd = 0;
1917         eqd = min_t(u32, eqd, aic->max_eqd);
1918         eqd = max_t(u32, eqd, aic->min_eqd);
1919
1920         be_aic_update(aic, rx_pkts, tx_pkts, now);
1921
1922         return eqd;
1923 }
1924
1925 /* For Skyhawk-R only */
1926 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
1927 {
1928         struct be_adapter *adapter = eqo->adapter;
1929         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
1930         ulong now = jiffies;
1931         int eqd;
1932         u32 mult_enc;
1933
1934         if (!aic->enable)
1935                 return 0;
1936
1937         if (jiffies_to_msecs(now - aic->jiffies) < 1)
1938                 eqd = aic->prev_eqd;
1939         else
1940                 eqd = be_get_new_eqd(eqo);
1941
1942         if (eqd > 100)
1943                 mult_enc = R2I_DLY_ENC_1;
1944         else if (eqd > 60)
1945                 mult_enc = R2I_DLY_ENC_2;
1946         else if (eqd > 20)
1947                 mult_enc = R2I_DLY_ENC_3;
1948         else
1949                 mult_enc = R2I_DLY_ENC_0;
1950
1951         aic->prev_eqd = eqd;
1952
1953         return mult_enc;
1954 }
1955
1956 void be_eqd_update(struct be_adapter *adapter, bool force_update)
1957 {
1958         struct be_set_eqd set_eqd[MAX_EVT_QS];
1959         struct be_aic_obj *aic;
1960         struct be_eq_obj *eqo;
1961         int i, num = 0, eqd;
1962
1963         for_all_evt_queues(adapter, eqo, i) {
1964                 aic = &adapter->aic_obj[eqo->idx];
1965                 eqd = be_get_new_eqd(eqo);
1966                 if (force_update || eqd != aic->prev_eqd) {
1967                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
1968                         set_eqd[num].eq_id = eqo->q.id;
1969                         aic->prev_eqd = eqd;
1970                         num++;
1971                 }
1972         }
1973
1974         if (num)
1975                 be_cmd_modify_eqd(adapter, set_eqd, num);
1976 }
1977
1978 static void be_rx_stats_update(struct be_rx_obj *rxo,
1979                                struct be_rx_compl_info *rxcp)
1980 {
1981         struct be_rx_stats *stats = rx_stats(rxo);
1982
1983         u64_stats_update_begin(&stats->sync);
1984         stats->rx_compl++;
1985         stats->rx_bytes += rxcp->pkt_size;
1986         stats->rx_pkts++;
1987         if (rxcp->tunneled)
1988                 stats->rx_vxlan_offload_pkts++;
1989         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
1990                 stats->rx_mcast_pkts++;
1991         if (rxcp->err)
1992                 stats->rx_compl_err++;
1993         u64_stats_update_end(&stats->sync);
1994 }
1995
1996 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
1997 {
1998         /* L4 checksum is not reliable for non TCP/UDP packets.
1999          * Also ignore ipcksm for ipv6 pkts
2000          */
2001         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2002                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2003 }
2004
2005 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2006 {
2007         struct be_adapter *adapter = rxo->adapter;
2008         struct be_rx_page_info *rx_page_info;
2009         struct be_queue_info *rxq = &rxo->q;
2010         u32 frag_idx = rxq->tail;
2011
2012         rx_page_info = &rxo->page_info_tbl[frag_idx];
2013         BUG_ON(!rx_page_info->page);
2014
2015         if (rx_page_info->last_frag) {
2016                 dma_unmap_page(&adapter->pdev->dev,
2017                                dma_unmap_addr(rx_page_info, bus),
2018                                adapter->big_page_size, DMA_FROM_DEVICE);
2019                 rx_page_info->last_frag = false;
2020         } else {
2021                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2022                                         dma_unmap_addr(rx_page_info, bus),
2023                                         rx_frag_size, DMA_FROM_DEVICE);
2024         }
2025
2026         queue_tail_inc(rxq);
2027         atomic_dec(&rxq->used);
2028         return rx_page_info;
2029 }
2030
2031 /* Throwaway the data in the Rx completion */
2032 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2033                                 struct be_rx_compl_info *rxcp)
2034 {
2035         struct be_rx_page_info *page_info;
2036         u16 i, num_rcvd = rxcp->num_rcvd;
2037
2038         for (i = 0; i < num_rcvd; i++) {
2039                 page_info = get_rx_page_info(rxo);
2040                 put_page(page_info->page);
2041                 memset(page_info, 0, sizeof(*page_info));
2042         }
2043 }
2044
2045 /*
2046  * skb_fill_rx_data forms a complete skb for an ether frame
2047  * indicated by rxcp.
2048  */
2049 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2050                              struct be_rx_compl_info *rxcp)
2051 {
2052         struct be_rx_page_info *page_info;
2053         u16 i, j;
2054         u16 hdr_len, curr_frag_len, remaining;
2055         u8 *start;
2056
2057         page_info = get_rx_page_info(rxo);
2058         start = page_address(page_info->page) + page_info->page_offset;
2059         prefetch(start);
2060
2061         /* Copy data in the first descriptor of this completion */
2062         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2063
2064         skb->len = curr_frag_len;
2065         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2066                 memcpy(skb->data, start, curr_frag_len);
2067                 /* Complete packet has now been moved to data */
2068                 put_page(page_info->page);
2069                 skb->data_len = 0;
2070                 skb->tail += curr_frag_len;
2071         } else {
2072                 hdr_len = ETH_HLEN;
2073                 memcpy(skb->data, start, hdr_len);
2074                 skb_shinfo(skb)->nr_frags = 1;
2075                 skb_frag_set_page(skb, 0, page_info->page);
2076                 skb_shinfo(skb)->frags[0].page_offset =
2077                                         page_info->page_offset + hdr_len;
2078                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2079                                   curr_frag_len - hdr_len);
2080                 skb->data_len = curr_frag_len - hdr_len;
2081                 skb->truesize += rx_frag_size;
2082                 skb->tail += hdr_len;
2083         }
2084         page_info->page = NULL;
2085
2086         if (rxcp->pkt_size <= rx_frag_size) {
2087                 BUG_ON(rxcp->num_rcvd != 1);
2088                 return;
2089         }
2090
2091         /* More frags present for this completion */
2092         remaining = rxcp->pkt_size - curr_frag_len;
2093         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2094                 page_info = get_rx_page_info(rxo);
2095                 curr_frag_len = min(remaining, rx_frag_size);
2096
2097                 /* Coalesce all frags from the same physical page in one slot */
2098                 if (page_info->page_offset == 0) {
2099                         /* Fresh page */
2100                         j++;
2101                         skb_frag_set_page(skb, j, page_info->page);
2102                         skb_shinfo(skb)->frags[j].page_offset =
2103                                                         page_info->page_offset;
2104                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2105                         skb_shinfo(skb)->nr_frags++;
2106                 } else {
2107                         put_page(page_info->page);
2108                 }
2109
2110                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2111                 skb->len += curr_frag_len;
2112                 skb->data_len += curr_frag_len;
2113                 skb->truesize += rx_frag_size;
2114                 remaining -= curr_frag_len;
2115                 page_info->page = NULL;
2116         }
2117         BUG_ON(j > MAX_SKB_FRAGS);
2118 }
2119
2120 /* Process the RX completion indicated by rxcp when GRO is disabled */
2121 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2122                                 struct be_rx_compl_info *rxcp)
2123 {
2124         struct be_adapter *adapter = rxo->adapter;
2125         struct net_device *netdev = adapter->netdev;
2126         struct sk_buff *skb;
2127
2128         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2129         if (unlikely(!skb)) {
2130                 rx_stats(rxo)->rx_drops_no_skbs++;
2131                 be_rx_compl_discard(rxo, rxcp);
2132                 return;
2133         }
2134
2135         skb_fill_rx_data(rxo, skb, rxcp);
2136
2137         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2138                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2139         else
2140                 skb_checksum_none_assert(skb);
2141
2142         skb->protocol = eth_type_trans(skb, netdev);
2143         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2144         if (netdev->features & NETIF_F_RXHASH)
2145                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2146
2147         skb->csum_level = rxcp->tunneled;
2148         skb_mark_napi_id(skb, napi);
2149
2150         if (rxcp->vlanf)
2151                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2152
2153         netif_receive_skb(skb);
2154 }
2155
2156 /* Process the RX completion indicated by rxcp when GRO is enabled */
2157 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2158                                     struct napi_struct *napi,
2159                                     struct be_rx_compl_info *rxcp)
2160 {
2161         struct be_adapter *adapter = rxo->adapter;
2162         struct be_rx_page_info *page_info;
2163         struct sk_buff *skb = NULL;
2164         u16 remaining, curr_frag_len;
2165         u16 i, j;
2166
2167         skb = napi_get_frags(napi);
2168         if (!skb) {
2169                 be_rx_compl_discard(rxo, rxcp);
2170                 return;
2171         }
2172
2173         remaining = rxcp->pkt_size;
2174         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2175                 page_info = get_rx_page_info(rxo);
2176
2177                 curr_frag_len = min(remaining, rx_frag_size);
2178
2179                 /* Coalesce all frags from the same physical page in one slot */
2180                 if (i == 0 || page_info->page_offset == 0) {
2181                         /* First frag or Fresh page */
2182                         j++;
2183                         skb_frag_set_page(skb, j, page_info->page);
2184                         skb_shinfo(skb)->frags[j].page_offset =
2185                                                         page_info->page_offset;
2186                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2187                 } else {
2188                         put_page(page_info->page);
2189                 }
2190                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2191                 skb->truesize += rx_frag_size;
2192                 remaining -= curr_frag_len;
2193                 memset(page_info, 0, sizeof(*page_info));
2194         }
2195         BUG_ON(j > MAX_SKB_FRAGS);
2196
2197         skb_shinfo(skb)->nr_frags = j + 1;
2198         skb->len = rxcp->pkt_size;
2199         skb->data_len = rxcp->pkt_size;
2200         skb->ip_summed = CHECKSUM_UNNECESSARY;
2201         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2202         if (adapter->netdev->features & NETIF_F_RXHASH)
2203                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2204
2205         skb->csum_level = rxcp->tunneled;
2206
2207         if (rxcp->vlanf)
2208                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2209
2210         napi_gro_frags(napi);
2211 }
2212
2213 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2214                                  struct be_rx_compl_info *rxcp)
2215 {
2216         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2217         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2218         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2219         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2220         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2221         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2222         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2223         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2224         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2225         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2226         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2227         if (rxcp->vlanf) {
2228                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2229                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2230         }
2231         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2232         rxcp->tunneled =
2233                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2234 }
2235
2236 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2237                                  struct be_rx_compl_info *rxcp)
2238 {
2239         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2240         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2241         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2242         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2243         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2244         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2245         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2246         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2247         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2248         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2249         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2250         if (rxcp->vlanf) {
2251                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2252                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2253         }
2254         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2255         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2256 }
2257
2258 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2259 {
2260         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2261         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2262         struct be_adapter *adapter = rxo->adapter;
2263
2264         /* For checking the valid bit it is Ok to use either definition as the
2265          * valid bit is at the same position in both v0 and v1 Rx compl */
2266         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2267                 return NULL;
2268
2269         rmb();
2270         be_dws_le_to_cpu(compl, sizeof(*compl));
2271
2272         if (adapter->be3_native)
2273                 be_parse_rx_compl_v1(compl, rxcp);
2274         else
2275                 be_parse_rx_compl_v0(compl, rxcp);
2276
2277         if (rxcp->ip_frag)
2278                 rxcp->l4_csum = 0;
2279
2280         if (rxcp->vlanf) {
2281                 /* In QNQ modes, if qnq bit is not set, then the packet was
2282                  * tagged only with the transparent outer vlan-tag and must
2283                  * not be treated as a vlan packet by host
2284                  */
2285                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2286                         rxcp->vlanf = 0;
2287
2288                 if (!lancer_chip(adapter))
2289                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2290
2291                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2292                     !test_bit(rxcp->vlan_tag, adapter->vids))
2293                         rxcp->vlanf = 0;
2294         }
2295
2296         /* As the compl has been parsed, reset it; we wont touch it again */
2297         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2298
2299         queue_tail_inc(&rxo->cq);
2300         return rxcp;
2301 }
2302
2303 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2304 {
2305         u32 order = get_order(size);
2306
2307         if (order > 0)
2308                 gfp |= __GFP_COMP;
2309         return  alloc_pages(gfp, order);
2310 }
2311
2312 /*
2313  * Allocate a page, split it to fragments of size rx_frag_size and post as
2314  * receive buffers to BE
2315  */
2316 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2317 {
2318         struct be_adapter *adapter = rxo->adapter;
2319         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2320         struct be_queue_info *rxq = &rxo->q;
2321         struct page *pagep = NULL;
2322         struct device *dev = &adapter->pdev->dev;
2323         struct be_eth_rx_d *rxd;
2324         u64 page_dmaaddr = 0, frag_dmaaddr;
2325         u32 posted, page_offset = 0, notify = 0;
2326
2327         page_info = &rxo->page_info_tbl[rxq->head];
2328         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2329                 if (!pagep) {
2330                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2331                         if (unlikely(!pagep)) {
2332                                 rx_stats(rxo)->rx_post_fail++;
2333                                 break;
2334                         }
2335                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2336                                                     adapter->big_page_size,
2337                                                     DMA_FROM_DEVICE);
2338                         if (dma_mapping_error(dev, page_dmaaddr)) {
2339                                 put_page(pagep);
2340                                 pagep = NULL;
2341                                 adapter->drv_stats.dma_map_errors++;
2342                                 break;
2343                         }
2344                         page_offset = 0;
2345                 } else {
2346                         get_page(pagep);
2347                         page_offset += rx_frag_size;
2348                 }
2349                 page_info->page_offset = page_offset;
2350                 page_info->page = pagep;
2351
2352                 rxd = queue_head_node(rxq);
2353                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2354                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2355                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2356
2357                 /* Any space left in the current big page for another frag? */
2358                 if ((page_offset + rx_frag_size + rx_frag_size) >
2359                                         adapter->big_page_size) {
2360                         pagep = NULL;
2361                         page_info->last_frag = true;
2362                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2363                 } else {
2364                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2365                 }
2366
2367                 prev_page_info = page_info;
2368                 queue_head_inc(rxq);
2369                 page_info = &rxo->page_info_tbl[rxq->head];
2370         }
2371
2372         /* Mark the last frag of a page when we break out of the above loop
2373          * with no more slots available in the RXQ
2374          */
2375         if (pagep) {
2376                 prev_page_info->last_frag = true;
2377                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2378         }
2379
2380         if (posted) {
2381                 atomic_add(posted, &rxq->used);
2382                 if (rxo->rx_post_starved)
2383                         rxo->rx_post_starved = false;
2384                 do {
2385                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2386                         be_rxq_notify(adapter, rxq->id, notify);
2387                         posted -= notify;
2388                 } while (posted);
2389         } else if (atomic_read(&rxq->used) == 0) {
2390                 /* Let be_worker replenish when memory is available */
2391                 rxo->rx_post_starved = true;
2392         }
2393 }
2394
2395 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2396 {
2397         struct be_queue_info *tx_cq = &txo->cq;
2398         struct be_tx_compl_info *txcp = &txo->txcp;
2399         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2400
2401         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2402                 return NULL;
2403
2404         /* Ensure load ordering of valid bit dword and other dwords below */
2405         rmb();
2406         be_dws_le_to_cpu(compl, sizeof(*compl));
2407
2408         txcp->status = GET_TX_COMPL_BITS(status, compl);
2409         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2410
2411         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2412         queue_tail_inc(tx_cq);
2413         return txcp;
2414 }
2415
2416 static u16 be_tx_compl_process(struct be_adapter *adapter,
2417                                struct be_tx_obj *txo, u16 last_index)
2418 {
2419         struct sk_buff **sent_skbs = txo->sent_skb_list;
2420         struct be_queue_info *txq = &txo->q;
2421         struct sk_buff *skb = NULL;
2422         bool unmap_skb_hdr = false;
2423         struct be_eth_wrb *wrb;
2424         u16 num_wrbs = 0;
2425         u32 frag_index;
2426
2427         do {
2428                 if (sent_skbs[txq->tail]) {
2429                         /* Free skb from prev req */
2430                         if (skb)
2431                                 dev_consume_skb_any(skb);
2432                         skb = sent_skbs[txq->tail];
2433                         sent_skbs[txq->tail] = NULL;
2434                         queue_tail_inc(txq);  /* skip hdr wrb */
2435                         num_wrbs++;
2436                         unmap_skb_hdr = true;
2437                 }
2438                 wrb = queue_tail_node(txq);
2439                 frag_index = txq->tail;
2440                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2441                               (unmap_skb_hdr && skb_headlen(skb)));
2442                 unmap_skb_hdr = false;
2443                 queue_tail_inc(txq);
2444                 num_wrbs++;
2445         } while (frag_index != last_index);
2446         dev_consume_skb_any(skb);
2447
2448         return num_wrbs;
2449 }
2450
2451 /* Return the number of events in the event queue */
2452 static inline int events_get(struct be_eq_obj *eqo)
2453 {
2454         struct be_eq_entry *eqe;
2455         int num = 0;
2456
2457         do {
2458                 eqe = queue_tail_node(&eqo->q);
2459                 if (eqe->evt == 0)
2460                         break;
2461
2462                 rmb();
2463                 eqe->evt = 0;
2464                 num++;
2465                 queue_tail_inc(&eqo->q);
2466         } while (true);
2467
2468         return num;
2469 }
2470
2471 /* Leaves the EQ is disarmed state */
2472 static void be_eq_clean(struct be_eq_obj *eqo)
2473 {
2474         int num = events_get(eqo);
2475
2476         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2477 }
2478
2479 /* Free posted rx buffers that were not used */
2480 static void be_rxq_clean(struct be_rx_obj *rxo)
2481 {
2482         struct be_queue_info *rxq = &rxo->q;
2483         struct be_rx_page_info *page_info;
2484
2485         while (atomic_read(&rxq->used) > 0) {
2486                 page_info = get_rx_page_info(rxo);
2487                 put_page(page_info->page);
2488                 memset(page_info, 0, sizeof(*page_info));
2489         }
2490         BUG_ON(atomic_read(&rxq->used));
2491         rxq->tail = 0;
2492         rxq->head = 0;
2493 }
2494
2495 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2496 {
2497         struct be_queue_info *rx_cq = &rxo->cq;
2498         struct be_rx_compl_info *rxcp;
2499         struct be_adapter *adapter = rxo->adapter;
2500         int flush_wait = 0;
2501
2502         /* Consume pending rx completions.
2503          * Wait for the flush completion (identified by zero num_rcvd)
2504          * to arrive. Notify CQ even when there are no more CQ entries
2505          * for HW to flush partially coalesced CQ entries.
2506          * In Lancer, there is no need to wait for flush compl.
2507          */
2508         for (;;) {
2509                 rxcp = be_rx_compl_get(rxo);
2510                 if (!rxcp) {
2511                         if (lancer_chip(adapter))
2512                                 break;
2513
2514                         if (flush_wait++ > 50 ||
2515                             be_check_error(adapter,
2516                                            BE_ERROR_HW)) {
2517                                 dev_warn(&adapter->pdev->dev,
2518                                          "did not receive flush compl\n");
2519                                 break;
2520                         }
2521                         be_cq_notify(adapter, rx_cq->id, true, 0);
2522                         mdelay(1);
2523                 } else {
2524                         be_rx_compl_discard(rxo, rxcp);
2525                         be_cq_notify(adapter, rx_cq->id, false, 1);
2526                         if (rxcp->num_rcvd == 0)
2527                                 break;
2528                 }
2529         }
2530
2531         /* After cleanup, leave the CQ in unarmed state */
2532         be_cq_notify(adapter, rx_cq->id, false, 0);
2533 }
2534
2535 static void be_tx_compl_clean(struct be_adapter *adapter)
2536 {
2537         struct device *dev = &adapter->pdev->dev;
2538         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2539         struct be_tx_compl_info *txcp;
2540         struct be_queue_info *txq;
2541         u32 end_idx, notified_idx;
2542         struct be_tx_obj *txo;
2543         int i, pending_txqs;
2544
2545         /* Stop polling for compls when HW has been silent for 10ms */
2546         do {
2547                 pending_txqs = adapter->num_tx_qs;
2548
2549                 for_all_tx_queues(adapter, txo, i) {
2550                         cmpl = 0;
2551                         num_wrbs = 0;
2552                         txq = &txo->q;
2553                         while ((txcp = be_tx_compl_get(txo))) {
2554                                 num_wrbs +=
2555                                         be_tx_compl_process(adapter, txo,
2556                                                             txcp->end_index);
2557                                 cmpl++;
2558                         }
2559                         if (cmpl) {
2560                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2561                                 atomic_sub(num_wrbs, &txq->used);
2562                                 timeo = 0;
2563                         }
2564                         if (!be_is_tx_compl_pending(txo))
2565                                 pending_txqs--;
2566                 }
2567
2568                 if (pending_txqs == 0 || ++timeo > 10 ||
2569                     be_check_error(adapter, BE_ERROR_HW))
2570                         break;
2571
2572                 mdelay(1);
2573         } while (true);
2574
2575         /* Free enqueued TX that was never notified to HW */
2576         for_all_tx_queues(adapter, txo, i) {
2577                 txq = &txo->q;
2578
2579                 if (atomic_read(&txq->used)) {
2580                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2581                                  i, atomic_read(&txq->used));
2582                         notified_idx = txq->tail;
2583                         end_idx = txq->tail;
2584                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2585                                   txq->len);
2586                         /* Use the tx-compl process logic to handle requests
2587                          * that were not sent to the HW.
2588                          */
2589                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2590                         atomic_sub(num_wrbs, &txq->used);
2591                         BUG_ON(atomic_read(&txq->used));
2592                         txo->pend_wrb_cnt = 0;
2593                         /* Since hw was never notified of these requests,
2594                          * reset TXQ indices
2595                          */
2596                         txq->head = notified_idx;
2597                         txq->tail = notified_idx;
2598                 }
2599         }
2600 }
2601
2602 static void be_evt_queues_destroy(struct be_adapter *adapter)
2603 {
2604         struct be_eq_obj *eqo;
2605         int i;
2606
2607         for_all_evt_queues(adapter, eqo, i) {
2608                 if (eqo->q.created) {
2609                         be_eq_clean(eqo);
2610                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2611                         napi_hash_del(&eqo->napi);
2612                         netif_napi_del(&eqo->napi);
2613                         free_cpumask_var(eqo->affinity_mask);
2614                 }
2615                 be_queue_free(adapter, &eqo->q);
2616         }
2617 }
2618
2619 static int be_evt_queues_create(struct be_adapter *adapter)
2620 {
2621         struct be_queue_info *eq;
2622         struct be_eq_obj *eqo;
2623         struct be_aic_obj *aic;
2624         int i, rc;
2625
2626         /* need enough EQs to service both RX and TX queues */
2627         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2628                                     max(adapter->cfg_num_rx_irqs,
2629                                         adapter->cfg_num_tx_irqs));
2630
2631         for_all_evt_queues(adapter, eqo, i) {
2632                 int numa_node = dev_to_node(&adapter->pdev->dev);
2633
2634                 aic = &adapter->aic_obj[i];
2635                 eqo->adapter = adapter;
2636                 eqo->idx = i;
2637                 aic->max_eqd = BE_MAX_EQD;
2638                 aic->enable = true;
2639
2640                 eq = &eqo->q;
2641                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2642                                     sizeof(struct be_eq_entry));
2643                 if (rc)
2644                         return rc;
2645
2646                 rc = be_cmd_eq_create(adapter, eqo);
2647                 if (rc)
2648                         return rc;
2649
2650                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2651                         return -ENOMEM;
2652                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2653                                 eqo->affinity_mask);
2654                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2655                                BE_NAPI_WEIGHT);
2656         }
2657         return 0;
2658 }
2659
2660 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2661 {
2662         struct be_queue_info *q;
2663
2664         q = &adapter->mcc_obj.q;
2665         if (q->created)
2666                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2667         be_queue_free(adapter, q);
2668
2669         q = &adapter->mcc_obj.cq;
2670         if (q->created)
2671                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2672         be_queue_free(adapter, q);
2673 }
2674
2675 /* Must be called only after TX qs are created as MCC shares TX EQ */
2676 static int be_mcc_queues_create(struct be_adapter *adapter)
2677 {
2678         struct be_queue_info *q, *cq;
2679
2680         cq = &adapter->mcc_obj.cq;
2681         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2682                            sizeof(struct be_mcc_compl)))
2683                 goto err;
2684
2685         /* Use the default EQ for MCC completions */
2686         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2687                 goto mcc_cq_free;
2688
2689         q = &adapter->mcc_obj.q;
2690         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2691                 goto mcc_cq_destroy;
2692
2693         if (be_cmd_mccq_create(adapter, q, cq))
2694                 goto mcc_q_free;
2695
2696         return 0;
2697
2698 mcc_q_free:
2699         be_queue_free(adapter, q);
2700 mcc_cq_destroy:
2701         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2702 mcc_cq_free:
2703         be_queue_free(adapter, cq);
2704 err:
2705         return -1;
2706 }
2707
2708 static void be_tx_queues_destroy(struct be_adapter *adapter)
2709 {
2710         struct be_queue_info *q;
2711         struct be_tx_obj *txo;
2712         u8 i;
2713
2714         for_all_tx_queues(adapter, txo, i) {
2715                 q = &txo->q;
2716                 if (q->created)
2717                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2718                 be_queue_free(adapter, q);
2719
2720                 q = &txo->cq;
2721                 if (q->created)
2722                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2723                 be_queue_free(adapter, q);
2724         }
2725 }
2726
2727 static int be_tx_qs_create(struct be_adapter *adapter)
2728 {
2729         struct be_queue_info *cq;
2730         struct be_tx_obj *txo;
2731         struct be_eq_obj *eqo;
2732         int status, i;
2733
2734         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2735
2736         for_all_tx_queues(adapter, txo, i) {
2737                 cq = &txo->cq;
2738                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2739                                         sizeof(struct be_eth_tx_compl));
2740                 if (status)
2741                         return status;
2742
2743                 u64_stats_init(&txo->stats.sync);
2744                 u64_stats_init(&txo->stats.sync_compl);
2745
2746                 /* If num_evt_qs is less than num_tx_qs, then more than
2747                  * one txq share an eq
2748                  */
2749                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2750                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2751                 if (status)
2752                         return status;
2753
2754                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2755                                         sizeof(struct be_eth_wrb));
2756                 if (status)
2757                         return status;
2758
2759                 status = be_cmd_txq_create(adapter, txo);
2760                 if (status)
2761                         return status;
2762
2763                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2764                                     eqo->idx);
2765         }
2766
2767         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2768                  adapter->num_tx_qs);
2769         return 0;
2770 }
2771
2772 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2773 {
2774         struct be_queue_info *q;
2775         struct be_rx_obj *rxo;
2776         int i;
2777
2778         for_all_rx_queues(adapter, rxo, i) {
2779                 q = &rxo->cq;
2780                 if (q->created)
2781                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2782                 be_queue_free(adapter, q);
2783         }
2784 }
2785
2786 static int be_rx_cqs_create(struct be_adapter *adapter)
2787 {
2788         struct be_queue_info *eq, *cq;
2789         struct be_rx_obj *rxo;
2790         int rc, i;
2791
2792         adapter->num_rss_qs =
2793                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2794
2795         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2796         if (adapter->num_rss_qs < 2)
2797                 adapter->num_rss_qs = 0;
2798
2799         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2800
2801         /* When the interface is not capable of RSS rings (and there is no
2802          * need to create a default RXQ) we'll still need one RXQ
2803          */
2804         if (adapter->num_rx_qs == 0)
2805                 adapter->num_rx_qs = 1;
2806
2807         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2808         for_all_rx_queues(adapter, rxo, i) {
2809                 rxo->adapter = adapter;
2810                 cq = &rxo->cq;
2811                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2812                                     sizeof(struct be_eth_rx_compl));
2813                 if (rc)
2814                         return rc;
2815
2816                 u64_stats_init(&rxo->stats.sync);
2817                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2818                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2819                 if (rc)
2820                         return rc;
2821         }
2822
2823         dev_info(&adapter->pdev->dev,
2824                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2825         return 0;
2826 }
2827
2828 static irqreturn_t be_intx(int irq, void *dev)
2829 {
2830         struct be_eq_obj *eqo = dev;
2831         struct be_adapter *adapter = eqo->adapter;
2832         int num_evts = 0;
2833
2834         /* IRQ is not expected when NAPI is scheduled as the EQ
2835          * will not be armed.
2836          * But, this can happen on Lancer INTx where it takes
2837          * a while to de-assert INTx or in BE2 where occasionaly
2838          * an interrupt may be raised even when EQ is unarmed.
2839          * If NAPI is already scheduled, then counting & notifying
2840          * events will orphan them.
2841          */
2842         if (napi_schedule_prep(&eqo->napi)) {
2843                 num_evts = events_get(eqo);
2844                 __napi_schedule(&eqo->napi);
2845                 if (num_evts)
2846                         eqo->spurious_intr = 0;
2847         }
2848         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2849
2850         /* Return IRQ_HANDLED only for the the first spurious intr
2851          * after a valid intr to stop the kernel from branding
2852          * this irq as a bad one!
2853          */
2854         if (num_evts || eqo->spurious_intr++ == 0)
2855                 return IRQ_HANDLED;
2856         else
2857                 return IRQ_NONE;
2858 }
2859
2860 static irqreturn_t be_msix(int irq, void *dev)
2861 {
2862         struct be_eq_obj *eqo = dev;
2863
2864         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2865         napi_schedule(&eqo->napi);
2866         return IRQ_HANDLED;
2867 }
2868
2869 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2870 {
2871         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2872 }
2873
2874 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2875                          int budget, int polling)
2876 {
2877         struct be_adapter *adapter = rxo->adapter;
2878         struct be_queue_info *rx_cq = &rxo->cq;
2879         struct be_rx_compl_info *rxcp;
2880         u32 work_done;
2881         u32 frags_consumed = 0;
2882
2883         for (work_done = 0; work_done < budget; work_done++) {
2884                 rxcp = be_rx_compl_get(rxo);
2885                 if (!rxcp)
2886                         break;
2887
2888                 /* Is it a flush compl that has no data */
2889                 if (unlikely(rxcp->num_rcvd == 0))
2890                         goto loop_continue;
2891
2892                 /* Discard compl with partial DMA Lancer B0 */
2893                 if (unlikely(!rxcp->pkt_size)) {
2894                         be_rx_compl_discard(rxo, rxcp);
2895                         goto loop_continue;
2896                 }
2897
2898                 /* On BE drop pkts that arrive due to imperfect filtering in
2899                  * promiscuous mode on some skews
2900                  */
2901                 if (unlikely(rxcp->port != adapter->port_num &&
2902                              !lancer_chip(adapter))) {
2903                         be_rx_compl_discard(rxo, rxcp);
2904                         goto loop_continue;
2905                 }
2906
2907                 /* Don't do gro when we're busy_polling */
2908                 if (do_gro(rxcp) && polling != BUSY_POLLING)
2909                         be_rx_compl_process_gro(rxo, napi, rxcp);
2910                 else
2911                         be_rx_compl_process(rxo, napi, rxcp);
2912
2913 loop_continue:
2914                 frags_consumed += rxcp->num_rcvd;
2915                 be_rx_stats_update(rxo, rxcp);
2916         }
2917
2918         if (work_done) {
2919                 be_cq_notify(adapter, rx_cq->id, true, work_done);
2920
2921                 /* When an rx-obj gets into post_starved state, just
2922                  * let be_worker do the posting.
2923                  */
2924                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
2925                     !rxo->rx_post_starved)
2926                         be_post_rx_frags(rxo, GFP_ATOMIC,
2927                                          max_t(u32, MAX_RX_POST,
2928                                                frags_consumed));
2929         }
2930
2931         return work_done;
2932 }
2933
2934 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2935 {
2936         switch (status) {
2937         case BE_TX_COMP_HDR_PARSE_ERR:
2938                 tx_stats(txo)->tx_hdr_parse_err++;
2939                 break;
2940         case BE_TX_COMP_NDMA_ERR:
2941                 tx_stats(txo)->tx_dma_err++;
2942                 break;
2943         case BE_TX_COMP_ACL_ERR:
2944                 tx_stats(txo)->tx_spoof_check_err++;
2945                 break;
2946         }
2947 }
2948
2949 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2950 {
2951         switch (status) {
2952         case LANCER_TX_COMP_LSO_ERR:
2953                 tx_stats(txo)->tx_tso_err++;
2954                 break;
2955         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2956         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2957                 tx_stats(txo)->tx_spoof_check_err++;
2958                 break;
2959         case LANCER_TX_COMP_QINQ_ERR:
2960                 tx_stats(txo)->tx_qinq_err++;
2961                 break;
2962         case LANCER_TX_COMP_PARITY_ERR:
2963                 tx_stats(txo)->tx_internal_parity_err++;
2964                 break;
2965         case LANCER_TX_COMP_DMA_ERR:
2966                 tx_stats(txo)->tx_dma_err++;
2967                 break;
2968         }
2969 }
2970
2971 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
2972                           int idx)
2973 {
2974         int num_wrbs = 0, work_done = 0;
2975         struct be_tx_compl_info *txcp;
2976
2977         while ((txcp = be_tx_compl_get(txo))) {
2978                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
2979                 work_done++;
2980
2981                 if (txcp->status) {
2982                         if (lancer_chip(adapter))
2983                                 lancer_update_tx_err(txo, txcp->status);
2984                         else
2985                                 be_update_tx_err(txo, txcp->status);
2986                 }
2987         }
2988
2989         if (work_done) {
2990                 be_cq_notify(adapter, txo->cq.id, true, work_done);
2991                 atomic_sub(num_wrbs, &txo->q.used);
2992
2993                 /* As Tx wrbs have been freed up, wake up netdev queue
2994                  * if it was stopped due to lack of tx wrbs.  */
2995                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
2996                     be_can_txq_wake(txo)) {
2997                         netif_wake_subqueue(adapter->netdev, idx);
2998                 }
2999
3000                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3001                 tx_stats(txo)->tx_compl += work_done;
3002                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3003         }
3004 }
3005
3006 #ifdef CONFIG_NET_RX_BUSY_POLL
3007 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3008 {
3009         bool status = true;
3010
3011         spin_lock(&eqo->lock); /* BH is already disabled */
3012         if (eqo->state & BE_EQ_LOCKED) {
3013                 WARN_ON(eqo->state & BE_EQ_NAPI);
3014                 eqo->state |= BE_EQ_NAPI_YIELD;
3015                 status = false;
3016         } else {
3017                 eqo->state = BE_EQ_NAPI;
3018         }
3019         spin_unlock(&eqo->lock);
3020         return status;
3021 }
3022
3023 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3024 {
3025         spin_lock(&eqo->lock); /* BH is already disabled */
3026
3027         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3028         eqo->state = BE_EQ_IDLE;
3029
3030         spin_unlock(&eqo->lock);
3031 }
3032
3033 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3034 {
3035         bool status = true;
3036
3037         spin_lock_bh(&eqo->lock);
3038         if (eqo->state & BE_EQ_LOCKED) {
3039                 eqo->state |= BE_EQ_POLL_YIELD;
3040                 status = false;
3041         } else {
3042                 eqo->state |= BE_EQ_POLL;
3043         }
3044         spin_unlock_bh(&eqo->lock);
3045         return status;
3046 }
3047
3048 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3049 {
3050         spin_lock_bh(&eqo->lock);
3051
3052         WARN_ON(eqo->state & (BE_EQ_NAPI));
3053         eqo->state = BE_EQ_IDLE;
3054
3055         spin_unlock_bh(&eqo->lock);
3056 }
3057
3058 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3059 {
3060         spin_lock_init(&eqo->lock);
3061         eqo->state = BE_EQ_IDLE;
3062 }
3063
3064 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3065 {
3066         local_bh_disable();
3067
3068         /* It's enough to just acquire napi lock on the eqo to stop
3069          * be_busy_poll() from processing any queueus.
3070          */
3071         while (!be_lock_napi(eqo))
3072                 mdelay(1);
3073
3074         local_bh_enable();
3075 }
3076
3077 #else /* CONFIG_NET_RX_BUSY_POLL */
3078
3079 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3080 {
3081         return true;
3082 }
3083
3084 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3085 {
3086 }
3087
3088 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3089 {
3090         return false;
3091 }
3092
3093 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3094 {
3095 }
3096
3097 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3098 {
3099 }
3100
3101 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3102 {
3103 }
3104 #endif /* CONFIG_NET_RX_BUSY_POLL */
3105
3106 int be_poll(struct napi_struct *napi, int budget)
3107 {
3108         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3109         struct be_adapter *adapter = eqo->adapter;
3110         int max_work = 0, work, i, num_evts;
3111         struct be_rx_obj *rxo;
3112         struct be_tx_obj *txo;
3113         u32 mult_enc = 0;
3114
3115         num_evts = events_get(eqo);
3116
3117         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3118                 be_process_tx(adapter, txo, i);
3119
3120         if (be_lock_napi(eqo)) {
3121                 /* This loop will iterate twice for EQ0 in which
3122                  * completions of the last RXQ (default one) are also processed
3123                  * For other EQs the loop iterates only once
3124                  */
3125                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3126                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3127                         max_work = max(work, max_work);
3128                 }
3129                 be_unlock_napi(eqo);
3130         } else {
3131                 max_work = budget;
3132         }
3133
3134         if (is_mcc_eqo(eqo))
3135                 be_process_mcc(adapter);
3136
3137         if (max_work < budget) {
3138                 napi_complete(napi);
3139
3140                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3141                  * delay via a delay multiplier encoding value
3142                  */
3143                 if (skyhawk_chip(adapter))
3144                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3145
3146                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3147                              mult_enc);
3148         } else {
3149                 /* As we'll continue in polling mode, count and clear events */
3150                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3151         }
3152         return max_work;
3153 }
3154
3155 #ifdef CONFIG_NET_RX_BUSY_POLL
3156 static int be_busy_poll(struct napi_struct *napi)
3157 {
3158         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3159         struct be_adapter *adapter = eqo->adapter;
3160         struct be_rx_obj *rxo;
3161         int i, work = 0;
3162
3163         if (!be_lock_busy_poll(eqo))
3164                 return LL_FLUSH_BUSY;
3165
3166         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3167                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3168                 if (work)
3169                         break;
3170         }
3171
3172         be_unlock_busy_poll(eqo);
3173         return work;
3174 }
3175 #endif
3176
3177 void be_detect_error(struct be_adapter *adapter)
3178 {
3179         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3180         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3181         u32 i;
3182         struct device *dev = &adapter->pdev->dev;
3183
3184         if (be_check_error(adapter, BE_ERROR_HW))
3185                 return;
3186
3187         if (lancer_chip(adapter)) {
3188                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3189                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3190                         be_set_error(adapter, BE_ERROR_UE);
3191                         sliport_err1 = ioread32(adapter->db +
3192                                                 SLIPORT_ERROR1_OFFSET);
3193                         sliport_err2 = ioread32(adapter->db +
3194                                                 SLIPORT_ERROR2_OFFSET);
3195                         /* Do not log error messages if its a FW reset */
3196                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3197                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3198                                 dev_info(dev, "Firmware update in progress\n");
3199                         } else {
3200                                 dev_err(dev, "Error detected in the card\n");
3201                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3202                                         sliport_status);
3203                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3204                                         sliport_err1);
3205                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3206                                         sliport_err2);
3207                         }
3208                 }
3209         } else {
3210                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3211                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3212                 ue_lo_mask = ioread32(adapter->pcicfg +
3213                                       PCICFG_UE_STATUS_LOW_MASK);
3214                 ue_hi_mask = ioread32(adapter->pcicfg +
3215                                       PCICFG_UE_STATUS_HI_MASK);
3216
3217                 ue_lo = (ue_lo & ~ue_lo_mask);
3218                 ue_hi = (ue_hi & ~ue_hi_mask);
3219
3220                 /* On certain platforms BE hardware can indicate spurious UEs.
3221                  * Allow HW to stop working completely in case of a real UE.
3222                  * Hence not setting the hw_error for UE detection.
3223                  */
3224
3225                 if (ue_lo || ue_hi) {
3226                         dev_err(dev,
3227                                 "Unrecoverable Error detected in the adapter");
3228                         dev_err(dev, "Please reboot server to recover");
3229                         if (skyhawk_chip(adapter))
3230                                 be_set_error(adapter, BE_ERROR_UE);
3231
3232                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3233                                 if (ue_lo & 1)
3234                                         dev_err(dev, "UE: %s bit set\n",
3235                                                 ue_status_low_desc[i]);
3236                         }
3237                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3238                                 if (ue_hi & 1)
3239                                         dev_err(dev, "UE: %s bit set\n",
3240                                                 ue_status_hi_desc[i]);
3241                         }
3242                 }
3243         }
3244 }
3245
3246 static void be_msix_disable(struct be_adapter *adapter)
3247 {
3248         if (msix_enabled(adapter)) {
3249                 pci_disable_msix(adapter->pdev);
3250                 adapter->num_msix_vec = 0;
3251                 adapter->num_msix_roce_vec = 0;
3252         }
3253 }
3254
3255 static int be_msix_enable(struct be_adapter *adapter)
3256 {
3257         unsigned int i, max_roce_eqs;
3258         struct device *dev = &adapter->pdev->dev;
3259         int num_vec;
3260
3261         /* If RoCE is supported, program the max number of vectors that
3262          * could be used for NIC and RoCE, else, just program the number
3263          * we'll use initially.
3264          */
3265         if (be_roce_supported(adapter)) {
3266                 max_roce_eqs =
3267                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3268                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3269                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3270         } else {
3271                 num_vec = max(adapter->cfg_num_rx_irqs,
3272                               adapter->cfg_num_tx_irqs);
3273         }
3274
3275         for (i = 0; i < num_vec; i++)
3276                 adapter->msix_entries[i].entry = i;
3277
3278         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3279                                         MIN_MSIX_VECTORS, num_vec);
3280         if (num_vec < 0)
3281                 goto fail;
3282
3283         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3284                 adapter->num_msix_roce_vec = num_vec / 2;
3285                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3286                          adapter->num_msix_roce_vec);
3287         }
3288
3289         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3290
3291         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3292                  adapter->num_msix_vec);
3293         return 0;
3294
3295 fail:
3296         dev_warn(dev, "MSIx enable failed\n");
3297
3298         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3299         if (be_virtfn(adapter))
3300                 return num_vec;
3301         return 0;
3302 }
3303
3304 static inline int be_msix_vec_get(struct be_adapter *adapter,
3305                                   struct be_eq_obj *eqo)
3306 {
3307         return adapter->msix_entries[eqo->msix_idx].vector;
3308 }
3309
3310 static int be_msix_register(struct be_adapter *adapter)
3311 {
3312         struct net_device *netdev = adapter->netdev;
3313         struct be_eq_obj *eqo;
3314         int status, i, vec;
3315
3316         for_all_evt_queues(adapter, eqo, i) {
3317                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3318                 vec = be_msix_vec_get(adapter, eqo);
3319                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3320                 if (status)
3321                         goto err_msix;
3322
3323                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3324         }
3325
3326         return 0;
3327 err_msix:
3328         for (i--; i >= 0; i--) {
3329                 eqo = &adapter->eq_obj[i];
3330                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3331         }
3332         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3333                  status);
3334         be_msix_disable(adapter);
3335         return status;
3336 }
3337
3338 static int be_irq_register(struct be_adapter *adapter)
3339 {
3340         struct net_device *netdev = adapter->netdev;
3341         int status;
3342
3343         if (msix_enabled(adapter)) {
3344                 status = be_msix_register(adapter);
3345                 if (status == 0)
3346                         goto done;
3347                 /* INTx is not supported for VF */
3348                 if (be_virtfn(adapter))
3349                         return status;
3350         }
3351
3352         /* INTx: only the first EQ is used */
3353         netdev->irq = adapter->pdev->irq;
3354         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3355                              &adapter->eq_obj[0]);
3356         if (status) {
3357                 dev_err(&adapter->pdev->dev,
3358                         "INTx request IRQ failed - err %d\n", status);
3359                 return status;
3360         }
3361 done:
3362         adapter->isr_registered = true;
3363         return 0;
3364 }
3365
3366 static void be_irq_unregister(struct be_adapter *adapter)
3367 {
3368         struct net_device *netdev = adapter->netdev;
3369         struct be_eq_obj *eqo;
3370         int i, vec;
3371
3372         if (!adapter->isr_registered)
3373                 return;
3374
3375         /* INTx */
3376         if (!msix_enabled(adapter)) {
3377                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3378                 goto done;
3379         }
3380
3381         /* MSIx */
3382         for_all_evt_queues(adapter, eqo, i) {
3383                 vec = be_msix_vec_get(adapter, eqo);
3384                 irq_set_affinity_hint(vec, NULL);
3385                 free_irq(vec, eqo);
3386         }
3387
3388 done:
3389         adapter->isr_registered = false;
3390 }
3391
3392 static void be_rx_qs_destroy(struct be_adapter *adapter)
3393 {
3394         struct rss_info *rss = &adapter->rss_info;
3395         struct be_queue_info *q;
3396         struct be_rx_obj *rxo;
3397         int i;
3398
3399         for_all_rx_queues(adapter, rxo, i) {
3400                 q = &rxo->q;
3401                 if (q->created) {
3402                         /* If RXQs are destroyed while in an "out of buffer"
3403                          * state, there is a possibility of an HW stall on
3404                          * Lancer. So, post 64 buffers to each queue to relieve
3405                          * the "out of buffer" condition.
3406                          * Make sure there's space in the RXQ before posting.
3407                          */
3408                         if (lancer_chip(adapter)) {
3409                                 be_rx_cq_clean(rxo);
3410                                 if (atomic_read(&q->used) == 0)
3411                                         be_post_rx_frags(rxo, GFP_KERNEL,
3412                                                          MAX_RX_POST);
3413                         }
3414
3415                         be_cmd_rxq_destroy(adapter, q);
3416                         be_rx_cq_clean(rxo);
3417                         be_rxq_clean(rxo);
3418                 }
3419                 be_queue_free(adapter, q);
3420         }
3421
3422         if (rss->rss_flags) {
3423                 rss->rss_flags = RSS_ENABLE_NONE;
3424                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3425                                   128, rss->rss_hkey);
3426         }
3427 }
3428
3429 static void be_disable_if_filters(struct be_adapter *adapter)
3430 {
3431         be_cmd_pmac_del(adapter, adapter->if_handle,
3432                         adapter->pmac_id[0], 0);
3433
3434         be_clear_uc_list(adapter);
3435
3436         /* The IFACE flags are enabled in the open path and cleared
3437          * in the close path. When a VF gets detached from the host and
3438          * assigned to a VM the following happens:
3439          *      - VF's IFACE flags get cleared in the detach path
3440          *      - IFACE create is issued by the VF in the attach path
3441          * Due to a bug in the BE3/Skyhawk-R FW
3442          * (Lancer FW doesn't have the bug), the IFACE capability flags
3443          * specified along with the IFACE create cmd issued by a VF are not
3444          * honoured by FW.  As a consequence, if a *new* driver
3445          * (that enables/disables IFACE flags in open/close)
3446          * is loaded in the host and an *old* driver is * used by a VM/VF,
3447          * the IFACE gets created *without* the needed flags.
3448          * To avoid this, disable RX-filter flags only for Lancer.
3449          */
3450         if (lancer_chip(adapter)) {
3451                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3452                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3453         }
3454 }
3455
3456 static int be_close(struct net_device *netdev)
3457 {
3458         struct be_adapter *adapter = netdev_priv(netdev);
3459         struct be_eq_obj *eqo;
3460         int i;
3461
3462         /* This protection is needed as be_close() may be called even when the
3463          * adapter is in cleared state (after eeh perm failure)
3464          */
3465         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3466                 return 0;
3467
3468         be_disable_if_filters(adapter);
3469
3470         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3471                 for_all_evt_queues(adapter, eqo, i) {
3472                         napi_disable(&eqo->napi);
3473                         be_disable_busy_poll(eqo);
3474                 }
3475                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3476         }
3477
3478         be_async_mcc_disable(adapter);
3479
3480         /* Wait for all pending tx completions to arrive so that
3481          * all tx skbs are freed.
3482          */
3483         netif_tx_disable(netdev);
3484         be_tx_compl_clean(adapter);
3485
3486         be_rx_qs_destroy(adapter);
3487
3488         for_all_evt_queues(adapter, eqo, i) {
3489                 if (msix_enabled(adapter))
3490                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3491                 else
3492                         synchronize_irq(netdev->irq);
3493                 be_eq_clean(eqo);
3494         }
3495
3496         be_irq_unregister(adapter);
3497
3498         return 0;
3499 }
3500
3501 static int be_rx_qs_create(struct be_adapter *adapter)
3502 {
3503         struct rss_info *rss = &adapter->rss_info;
3504         u8 rss_key[RSS_HASH_KEY_LEN];
3505         struct be_rx_obj *rxo;
3506         int rc, i, j;
3507
3508         for_all_rx_queues(adapter, rxo, i) {
3509                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3510                                     sizeof(struct be_eth_rx_d));
3511                 if (rc)
3512                         return rc;
3513         }
3514
3515         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3516                 rxo = default_rxo(adapter);
3517                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3518                                        rx_frag_size, adapter->if_handle,
3519                                        false, &rxo->rss_id);
3520                 if (rc)
3521                         return rc;
3522         }
3523
3524         for_all_rss_queues(adapter, rxo, i) {
3525                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3526                                        rx_frag_size, adapter->if_handle,
3527                                        true, &rxo->rss_id);
3528                 if (rc)
3529                         return rc;
3530         }
3531
3532         if (be_multi_rxq(adapter)) {
3533                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3534                         for_all_rss_queues(adapter, rxo, i) {
3535                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3536                                         break;
3537                                 rss->rsstable[j + i] = rxo->rss_id;
3538                                 rss->rss_queue[j + i] = i;
3539                         }
3540                 }
3541                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3542                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3543
3544                 if (!BEx_chip(adapter))
3545                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3546                                 RSS_ENABLE_UDP_IPV6;
3547
3548                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3549                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3550                                        RSS_INDIR_TABLE_LEN, rss_key);
3551                 if (rc) {
3552                         rss->rss_flags = RSS_ENABLE_NONE;
3553                         return rc;
3554                 }
3555
3556                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3557         } else {
3558                 /* Disable RSS, if only default RX Q is created */
3559                 rss->rss_flags = RSS_ENABLE_NONE;
3560         }
3561
3562
3563         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3564          * which is a queue empty condition
3565          */
3566         for_all_rx_queues(adapter, rxo, i)
3567                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3568
3569         return 0;
3570 }
3571
3572 static int be_enable_if_filters(struct be_adapter *adapter)
3573 {
3574         int status;
3575
3576         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3577         if (status)
3578                 return status;
3579
3580         /* For BE3 VFs, the PF programs the initial MAC address */
3581         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3582                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3583                                          adapter->if_handle,
3584                                          &adapter->pmac_id[0], 0);
3585                 if (status)
3586                         return status;
3587         }
3588
3589         if (adapter->vlans_added)
3590                 be_vid_config(adapter);
3591
3592         be_set_rx_mode(adapter->netdev);
3593
3594         return 0;
3595 }
3596
3597 static int be_open(struct net_device *netdev)
3598 {
3599         struct be_adapter *adapter = netdev_priv(netdev);
3600         struct be_eq_obj *eqo;
3601         struct be_rx_obj *rxo;
3602         struct be_tx_obj *txo;
3603         u8 link_status;
3604         int status, i;
3605
3606         status = be_rx_qs_create(adapter);
3607         if (status)
3608                 goto err;
3609
3610         status = be_enable_if_filters(adapter);
3611         if (status)
3612                 goto err;
3613
3614         status = be_irq_register(adapter);
3615         if (status)
3616                 goto err;
3617
3618         for_all_rx_queues(adapter, rxo, i)
3619                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3620
3621         for_all_tx_queues(adapter, txo, i)
3622                 be_cq_notify(adapter, txo->cq.id, true, 0);
3623
3624         be_async_mcc_enable(adapter);
3625
3626         for_all_evt_queues(adapter, eqo, i) {
3627                 napi_enable(&eqo->napi);
3628                 be_enable_busy_poll(eqo);
3629                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3630         }
3631         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3632
3633         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3634         if (!status)
3635                 be_link_status_update(adapter, link_status);
3636
3637         netif_tx_start_all_queues(netdev);
3638         if (skyhawk_chip(adapter))
3639                 udp_tunnel_get_rx_info(netdev);
3640
3641         return 0;
3642 err:
3643         be_close(adapter->netdev);
3644         return -EIO;
3645 }
3646
3647 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3648 {
3649         u32 addr;
3650
3651         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3652
3653         mac[5] = (u8)(addr & 0xFF);
3654         mac[4] = (u8)((addr >> 8) & 0xFF);
3655         mac[3] = (u8)((addr >> 16) & 0xFF);
3656         /* Use the OUI from the current MAC address */
3657         memcpy(mac, adapter->netdev->dev_addr, 3);
3658 }
3659
3660 /*
3661  * Generate a seed MAC address from the PF MAC Address using jhash.
3662  * MAC Address for VFs are assigned incrementally starting from the seed.
3663  * These addresses are programmed in the ASIC by the PF and the VF driver
3664  * queries for the MAC address during its probe.
3665  */
3666 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3667 {
3668         u32 vf;
3669         int status = 0;
3670         u8 mac[ETH_ALEN];
3671         struct be_vf_cfg *vf_cfg;
3672
3673         be_vf_eth_addr_generate(adapter, mac);
3674
3675         for_all_vfs(adapter, vf_cfg, vf) {
3676                 if (BEx_chip(adapter))
3677                         status = be_cmd_pmac_add(adapter, mac,
3678                                                  vf_cfg->if_handle,
3679                                                  &vf_cfg->pmac_id, vf + 1);
3680                 else
3681                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3682                                                 vf + 1);
3683
3684                 if (status)
3685                         dev_err(&adapter->pdev->dev,
3686                                 "Mac address assignment failed for VF %d\n",
3687                                 vf);
3688                 else
3689                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3690
3691                 mac[5] += 1;
3692         }
3693         return status;
3694 }
3695
3696 static int be_vfs_mac_query(struct be_adapter *adapter)
3697 {
3698         int status, vf;
3699         u8 mac[ETH_ALEN];
3700         struct be_vf_cfg *vf_cfg;
3701
3702         for_all_vfs(adapter, vf_cfg, vf) {
3703                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3704                                                mac, vf_cfg->if_handle,
3705                                                false, vf+1);
3706                 if (status)
3707                         return status;
3708                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3709         }
3710         return 0;
3711 }
3712
3713 static void be_vf_clear(struct be_adapter *adapter)
3714 {
3715         struct be_vf_cfg *vf_cfg;
3716         u32 vf;
3717
3718         if (pci_vfs_assigned(adapter->pdev)) {
3719                 dev_warn(&adapter->pdev->dev,
3720                          "VFs are assigned to VMs: not disabling VFs\n");
3721                 goto done;
3722         }
3723
3724         pci_disable_sriov(adapter->pdev);
3725
3726         for_all_vfs(adapter, vf_cfg, vf) {
3727                 if (BEx_chip(adapter))
3728                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3729                                         vf_cfg->pmac_id, vf + 1);
3730                 else
3731                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3732                                        vf + 1);
3733
3734                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3735         }
3736
3737         if (BE3_chip(adapter))
3738                 be_cmd_set_hsw_config(adapter, 0, 0,
3739                                       adapter->if_handle,
3740                                       PORT_FWD_TYPE_PASSTHRU, 0);
3741 done:
3742         kfree(adapter->vf_cfg);
3743         adapter->num_vfs = 0;
3744         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3745 }
3746
3747 static void be_clear_queues(struct be_adapter *adapter)
3748 {
3749         be_mcc_queues_destroy(adapter);
3750         be_rx_cqs_destroy(adapter);
3751         be_tx_queues_destroy(adapter);
3752         be_evt_queues_destroy(adapter);
3753 }
3754
3755 static void be_cancel_worker(struct be_adapter *adapter)
3756 {
3757         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3758                 cancel_delayed_work_sync(&adapter->work);
3759                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3760         }
3761 }
3762
3763 static void be_cancel_err_detection(struct be_adapter *adapter)
3764 {
3765         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3766                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3767                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3768         }
3769 }
3770
3771 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3772 {
3773         struct net_device *netdev = adapter->netdev;
3774
3775         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3776                 be_cmd_manage_iface(adapter, adapter->if_handle,
3777                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3778
3779         if (adapter->vxlan_port)
3780                 be_cmd_set_vxlan_port(adapter, 0);
3781
3782         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3783         adapter->vxlan_port = 0;
3784
3785         netdev->hw_enc_features = 0;
3786         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3787         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3788 }
3789
3790 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3791                                 struct be_resources *vft_res)
3792 {
3793         struct be_resources res = adapter->pool_res;
3794         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3795         struct be_resources res_mod = {0};
3796         u16 num_vf_qs = 1;
3797
3798         /* Distribute the queue resources among the PF and it's VFs */
3799         if (num_vfs) {
3800                 /* Divide the rx queues evenly among the VFs and the PF, capped
3801                  * at VF-EQ-count. Any remainder queues belong to the PF.
3802                  */
3803                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3804                                 res.max_rss_qs / (num_vfs + 1));
3805
3806                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3807                  * RSS Tables per port. Provide RSS on VFs, only if number of
3808                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3809                  */
3810                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3811                         num_vf_qs = 1;
3812         }
3813
3814         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3815          * which are modifiable using SET_PROFILE_CONFIG cmd.
3816          */
3817         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3818                                   RESOURCE_MODIFIABLE, 0);
3819
3820         /* If RSS IFACE capability flags are modifiable for a VF, set the
3821          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3822          * more than 1 RSSQ is available for a VF.
3823          * Otherwise, provision only 1 queue pair for VF.
3824          */
3825         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3826                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3827                 if (num_vf_qs > 1) {
3828                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3829                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3830                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3831                 } else {
3832                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3833                                              BE_IF_FLAGS_DEFQ_RSS);
3834                 }
3835         } else {
3836                 num_vf_qs = 1;
3837         }
3838
3839         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3840                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3841                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3842         }
3843
3844         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3845         vft_res->max_rx_qs = num_vf_qs;
3846         vft_res->max_rss_qs = num_vf_qs;
3847         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3848         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3849
3850         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3851          * among the PF and it's VFs, if the fields are changeable
3852          */
3853         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3854                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3855
3856         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3857                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3858
3859         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3860                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3861
3862         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3863                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3864 }
3865
3866 static int be_clear(struct be_adapter *adapter)
3867 {
3868         struct pci_dev *pdev = adapter->pdev;
3869         struct  be_resources vft_res = {0};
3870
3871         be_cancel_worker(adapter);
3872
3873         if (sriov_enabled(adapter))
3874                 be_vf_clear(adapter);
3875
3876         /* Re-configure FW to distribute resources evenly across max-supported
3877          * number of VFs, only when VFs are not already enabled.
3878          */
3879         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3880             !pci_vfs_assigned(pdev)) {
3881                 be_calculate_vf_res(adapter,
3882                                     pci_sriov_get_totalvfs(pdev),
3883                                     &vft_res);
3884                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3885                                         pci_sriov_get_totalvfs(pdev),
3886                                         &vft_res);
3887         }
3888
3889         be_disable_vxlan_offloads(adapter);
3890         kfree(adapter->pmac_id);
3891         adapter->pmac_id = NULL;
3892
3893         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3894
3895         be_clear_queues(adapter);
3896
3897         be_msix_disable(adapter);
3898         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3899         return 0;
3900 }
3901
3902 static int be_vfs_if_create(struct be_adapter *adapter)
3903 {
3904         struct be_resources res = {0};
3905         u32 cap_flags, en_flags, vf;
3906         struct be_vf_cfg *vf_cfg;
3907         int status;
3908
3909         /* If a FW profile exists, then cap_flags are updated */
3910         cap_flags = BE_VF_IF_EN_FLAGS;
3911
3912         for_all_vfs(adapter, vf_cfg, vf) {
3913                 if (!BE3_chip(adapter)) {
3914                         status = be_cmd_get_profile_config(adapter, &res, NULL,
3915                                                            ACTIVE_PROFILE_TYPE,
3916                                                            RESOURCE_LIMITS,
3917                                                            vf + 1);
3918                         if (!status) {
3919                                 cap_flags = res.if_cap_flags;
3920                                 /* Prevent VFs from enabling VLAN promiscuous
3921                                  * mode
3922                                  */
3923                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3924                         }
3925                 }
3926
3927                 /* PF should enable IF flags during proxy if_create call */
3928                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
3929                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
3930                                           &vf_cfg->if_handle, vf + 1);
3931                 if (status)
3932                         return status;
3933         }
3934
3935         return 0;
3936 }
3937
3938 static int be_vf_setup_init(struct be_adapter *adapter)
3939 {
3940         struct be_vf_cfg *vf_cfg;
3941         int vf;
3942
3943         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
3944                                   GFP_KERNEL);
3945         if (!adapter->vf_cfg)
3946                 return -ENOMEM;
3947
3948         for_all_vfs(adapter, vf_cfg, vf) {
3949                 vf_cfg->if_handle = -1;
3950                 vf_cfg->pmac_id = -1;
3951         }
3952         return 0;
3953 }
3954
3955 static int be_vf_setup(struct be_adapter *adapter)
3956 {
3957         struct device *dev = &adapter->pdev->dev;
3958         struct be_vf_cfg *vf_cfg;
3959         int status, old_vfs, vf;
3960         bool spoofchk;
3961
3962         old_vfs = pci_num_vf(adapter->pdev);
3963
3964         status = be_vf_setup_init(adapter);
3965         if (status)
3966                 goto err;
3967
3968         if (old_vfs) {
3969                 for_all_vfs(adapter, vf_cfg, vf) {
3970                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
3971                         if (status)
3972                                 goto err;
3973                 }
3974
3975                 status = be_vfs_mac_query(adapter);
3976                 if (status)
3977                         goto err;
3978         } else {
3979                 status = be_vfs_if_create(adapter);
3980                 if (status)
3981                         goto err;
3982
3983                 status = be_vf_eth_addr_config(adapter);
3984                 if (status)
3985                         goto err;
3986         }
3987
3988         for_all_vfs(adapter, vf_cfg, vf) {
3989                 /* Allow VFs to programs MAC/VLAN filters */
3990                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
3991                                                   vf + 1);
3992                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
3993                         status = be_cmd_set_fn_privileges(adapter,
3994                                                           vf_cfg->privileges |
3995                                                           BE_PRIV_FILTMGMT,
3996                                                           vf + 1);
3997                         if (!status) {
3998                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
3999                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4000                                          vf);
4001                         }
4002                 }
4003
4004                 /* Allow full available bandwidth */
4005                 if (!old_vfs)
4006                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4007
4008                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4009                                                vf_cfg->if_handle, NULL,
4010                                                &spoofchk);
4011                 if (!status)
4012                         vf_cfg->spoofchk = spoofchk;
4013
4014                 if (!old_vfs) {
4015                         be_cmd_enable_vf(adapter, vf + 1);
4016                         be_cmd_set_logical_link_config(adapter,
4017                                                        IFLA_VF_LINK_STATE_AUTO,
4018                                                        vf+1);
4019                 }
4020         }
4021
4022         if (!old_vfs) {
4023                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4024                 if (status) {
4025                         dev_err(dev, "SRIOV enable failed\n");
4026                         adapter->num_vfs = 0;
4027                         goto err;
4028                 }
4029         }
4030
4031         if (BE3_chip(adapter)) {
4032                 /* On BE3, enable VEB only when SRIOV is enabled */
4033                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4034                                                adapter->if_handle,
4035                                                PORT_FWD_TYPE_VEB, 0);
4036                 if (status)
4037                         goto err;
4038         }
4039
4040         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4041         return 0;
4042 err:
4043         dev_err(dev, "VF setup failed\n");
4044         be_vf_clear(adapter);
4045         return status;
4046 }
4047
4048 /* Converting function_mode bits on BE3 to SH mc_type enums */
4049
4050 static u8 be_convert_mc_type(u32 function_mode)
4051 {
4052         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4053                 return vNIC1;
4054         else if (function_mode & QNQ_MODE)
4055                 return FLEX10;
4056         else if (function_mode & VNIC_MODE)
4057                 return vNIC2;
4058         else if (function_mode & UMC_ENABLED)
4059                 return UMC;
4060         else
4061                 return MC_NONE;
4062 }
4063
4064 /* On BE2/BE3 FW does not suggest the supported limits */
4065 static void BEx_get_resources(struct be_adapter *adapter,
4066                               struct be_resources *res)
4067 {
4068         bool use_sriov = adapter->num_vfs ? 1 : 0;
4069
4070         if (be_physfn(adapter))
4071                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4072         else
4073                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4074
4075         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4076
4077         if (be_is_mc(adapter)) {
4078                 /* Assuming that there are 4 channels per port,
4079                  * when multi-channel is enabled
4080                  */
4081                 if (be_is_qnq_mode(adapter))
4082                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4083                 else
4084                         /* In a non-qnq multichannel mode, the pvid
4085                          * takes up one vlan entry
4086                          */
4087                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4088         } else {
4089                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4090         }
4091
4092         res->max_mcast_mac = BE_MAX_MC;
4093
4094         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4095          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4096          *    *only* if it is RSS-capable.
4097          */
4098         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4099             be_virtfn(adapter) ||
4100             (be_is_mc(adapter) &&
4101              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4102                 res->max_tx_qs = 1;
4103         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4104                 struct be_resources super_nic_res = {0};
4105
4106                 /* On a SuperNIC profile, the driver needs to use the
4107                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4108                  */
4109                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4110                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4111                                           0);
4112                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4113                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4114         } else {
4115                 res->max_tx_qs = BE3_MAX_TX_QS;
4116         }
4117
4118         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4119             !use_sriov && be_physfn(adapter))
4120                 res->max_rss_qs = (adapter->be3_native) ?
4121                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4122         res->max_rx_qs = res->max_rss_qs + 1;
4123
4124         if (be_physfn(adapter))
4125                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4126                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4127         else
4128                 res->max_evt_qs = 1;
4129
4130         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4131         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4132         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4133                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4134 }
4135
4136 static void be_setup_init(struct be_adapter *adapter)
4137 {
4138         adapter->vlan_prio_bmap = 0xff;
4139         adapter->phy.link_speed = -1;
4140         adapter->if_handle = -1;
4141         adapter->be3_native = false;
4142         adapter->if_flags = 0;
4143         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4144         if (be_physfn(adapter))
4145                 adapter->cmd_privileges = MAX_PRIVILEGES;
4146         else
4147                 adapter->cmd_privileges = MIN_PRIVILEGES;
4148 }
4149
4150 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4151  * However, this HW limitation is not exposed to the host via any SLI cmd.
4152  * As a result, in the case of SRIOV and in particular multi-partition configs
4153  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4154  * for distribution between the VFs. This self-imposed limit will determine the
4155  * no: of VFs for which RSS can be enabled.
4156  */
4157 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4158 {
4159         struct be_port_resources port_res = {0};
4160         u8 rss_tables_on_port;
4161         u16 max_vfs = be_max_vfs(adapter);
4162
4163         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4164                                   RESOURCE_LIMITS, 0);
4165
4166         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4167
4168         /* Each PF Pool's RSS Tables limit =
4169          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4170          */
4171         adapter->pool_res.max_rss_tables =
4172                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4173 }
4174
4175 static int be_get_sriov_config(struct be_adapter *adapter)
4176 {
4177         struct be_resources res = {0};
4178         int max_vfs, old_vfs;
4179
4180         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4181                                   RESOURCE_LIMITS, 0);
4182
4183         /* Some old versions of BE3 FW don't report max_vfs value */
4184         if (BE3_chip(adapter) && !res.max_vfs) {
4185                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4186                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4187         }
4188
4189         adapter->pool_res = res;
4190
4191         /* If during previous unload of the driver, the VFs were not disabled,
4192          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4193          * Instead use the TotalVFs value stored in the pci-dev struct.
4194          */
4195         old_vfs = pci_num_vf(adapter->pdev);
4196         if (old_vfs) {
4197                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4198                          old_vfs);
4199
4200                 adapter->pool_res.max_vfs =
4201                         pci_sriov_get_totalvfs(adapter->pdev);
4202                 adapter->num_vfs = old_vfs;
4203         }
4204
4205         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4206                 be_calculate_pf_pool_rss_tables(adapter);
4207                 dev_info(&adapter->pdev->dev,
4208                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4209                          be_max_pf_pool_rss_tables(adapter));
4210         }
4211         return 0;
4212 }
4213
4214 static void be_alloc_sriov_res(struct be_adapter *adapter)
4215 {
4216         int old_vfs = pci_num_vf(adapter->pdev);
4217         struct  be_resources vft_res = {0};
4218         int status;
4219
4220         be_get_sriov_config(adapter);
4221
4222         if (!old_vfs)
4223                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4224
4225         /* When the HW is in SRIOV capable configuration, the PF-pool
4226          * resources are given to PF during driver load, if there are no
4227          * old VFs. This facility is not available in BE3 FW.
4228          * Also, this is done by FW in Lancer chip.
4229          */
4230         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4231                 be_calculate_vf_res(adapter, 0, &vft_res);
4232                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4233                                                  &vft_res);
4234                 if (status)
4235                         dev_err(&adapter->pdev->dev,
4236                                 "Failed to optimize SRIOV resources\n");
4237         }
4238 }
4239
4240 static int be_get_resources(struct be_adapter *adapter)
4241 {
4242         struct device *dev = &adapter->pdev->dev;
4243         struct be_resources res = {0};
4244         int status;
4245
4246         /* For Lancer, SH etc read per-function resource limits from FW.
4247          * GET_FUNC_CONFIG returns per function guaranteed limits.
4248          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4249          */
4250         if (BEx_chip(adapter)) {
4251                 BEx_get_resources(adapter, &res);
4252         } else {
4253                 status = be_cmd_get_func_config(adapter, &res);
4254                 if (status)
4255                         return status;
4256
4257                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4258                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4259                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4260                         res.max_rss_qs -= 1;
4261         }
4262
4263         /* If RoCE is supported stash away half the EQs for RoCE */
4264         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4265                                 res.max_evt_qs / 2 : res.max_evt_qs;
4266         adapter->res = res;
4267
4268         /* If FW supports RSS default queue, then skip creating non-RSS
4269          * queue for non-IP traffic.
4270          */
4271         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4272                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4273
4274         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4275                  be_max_txqs(adapter), be_max_rxqs(adapter),
4276                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4277                  be_max_vfs(adapter));
4278         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4279                  be_max_uc(adapter), be_max_mc(adapter),
4280                  be_max_vlans(adapter));
4281
4282         /* Ensure RX and TX queues are created in pairs at init time */
4283         adapter->cfg_num_rx_irqs =
4284                                 min_t(u16, netif_get_num_default_rss_queues(),
4285                                       be_max_qp_irqs(adapter));
4286         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4287         return 0;
4288 }
4289
4290 static int be_get_config(struct be_adapter *adapter)
4291 {
4292         int status, level;
4293         u16 profile_id;
4294
4295         status = be_cmd_get_cntl_attributes(adapter);
4296         if (status)
4297                 return status;
4298
4299         status = be_cmd_query_fw_cfg(adapter);
4300         if (status)
4301                 return status;
4302
4303         if (!lancer_chip(adapter) && be_physfn(adapter))
4304                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4305
4306         if (BEx_chip(adapter)) {
4307                 level = be_cmd_get_fw_log_level(adapter);
4308                 adapter->msg_enable =
4309                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4310         }
4311
4312         be_cmd_get_acpi_wol_cap(adapter);
4313         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4314         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4315
4316         be_cmd_query_port_name(adapter);
4317
4318         if (be_physfn(adapter)) {
4319                 status = be_cmd_get_active_profile(adapter, &profile_id);
4320                 if (!status)
4321                         dev_info(&adapter->pdev->dev,
4322                                  "Using profile 0x%x\n", profile_id);
4323         }
4324
4325         return 0;
4326 }
4327
4328 static int be_mac_setup(struct be_adapter *adapter)
4329 {
4330         u8 mac[ETH_ALEN];
4331         int status;
4332
4333         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4334                 status = be_cmd_get_perm_mac(adapter, mac);
4335                 if (status)
4336                         return status;
4337
4338                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4339                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4340         }
4341
4342         return 0;
4343 }
4344
4345 static void be_schedule_worker(struct be_adapter *adapter)
4346 {
4347         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4348         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4349 }
4350
4351 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4352 {
4353         schedule_delayed_work(&adapter->be_err_detection_work,
4354                               msecs_to_jiffies(delay));
4355         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4356 }
4357
4358 static int be_setup_queues(struct be_adapter *adapter)
4359 {
4360         struct net_device *netdev = adapter->netdev;
4361         int status;
4362
4363         status = be_evt_queues_create(adapter);
4364         if (status)
4365                 goto err;
4366
4367         status = be_tx_qs_create(adapter);
4368         if (status)
4369                 goto err;
4370
4371         status = be_rx_cqs_create(adapter);
4372         if (status)
4373                 goto err;
4374
4375         status = be_mcc_queues_create(adapter);
4376         if (status)
4377                 goto err;
4378
4379         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4380         if (status)
4381                 goto err;
4382
4383         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4384         if (status)
4385                 goto err;
4386
4387         return 0;
4388 err:
4389         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4390         return status;
4391 }
4392
4393 static int be_if_create(struct be_adapter *adapter)
4394 {
4395         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4396         u32 cap_flags = be_if_cap_flags(adapter);
4397         int status;
4398
4399         if (adapter->cfg_num_rx_irqs == 1)
4400                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4401
4402         en_flags &= cap_flags;
4403         /* will enable all the needed filter flags in be_open() */
4404         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4405                                   &adapter->if_handle, 0);
4406
4407         return status;
4408 }
4409
4410 int be_update_queues(struct be_adapter *adapter)
4411 {
4412         struct net_device *netdev = adapter->netdev;
4413         int status;
4414
4415         if (netif_running(netdev))
4416                 be_close(netdev);
4417
4418         be_cancel_worker(adapter);
4419
4420         /* If any vectors have been shared with RoCE we cannot re-program
4421          * the MSIx table.
4422          */
4423         if (!adapter->num_msix_roce_vec)
4424                 be_msix_disable(adapter);
4425
4426         be_clear_queues(adapter);
4427         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4428         if (status)
4429                 return status;
4430
4431         if (!msix_enabled(adapter)) {
4432                 status = be_msix_enable(adapter);
4433                 if (status)
4434                         return status;
4435         }
4436
4437         status = be_if_create(adapter);
4438         if (status)
4439                 return status;
4440
4441         status = be_setup_queues(adapter);
4442         if (status)
4443                 return status;
4444
4445         be_schedule_worker(adapter);
4446
4447         if (netif_running(netdev))
4448                 status = be_open(netdev);
4449
4450         return status;
4451 }
4452
4453 static inline int fw_major_num(const char *fw_ver)
4454 {
4455         int fw_major = 0, i;
4456
4457         i = sscanf(fw_ver, "%d.", &fw_major);
4458         if (i != 1)
4459                 return 0;
4460
4461         return fw_major;
4462 }
4463
4464 /* If any VFs are already enabled don't FLR the PF */
4465 static bool be_reset_required(struct be_adapter *adapter)
4466 {
4467         return pci_num_vf(adapter->pdev) ? false : true;
4468 }
4469
4470 /* Wait for the FW to be ready and perform the required initialization */
4471 static int be_func_init(struct be_adapter *adapter)
4472 {
4473         int status;
4474
4475         status = be_fw_wait_ready(adapter);
4476         if (status)
4477                 return status;
4478
4479         if (be_reset_required(adapter)) {
4480                 status = be_cmd_reset_function(adapter);
4481                 if (status)
4482                         return status;
4483
4484                 /* Wait for interrupts to quiesce after an FLR */
4485                 msleep(100);
4486
4487                 /* We can clear all errors when function reset succeeds */
4488                 be_clear_error(adapter, BE_CLEAR_ALL);
4489         }
4490
4491         /* Tell FW we're ready to fire cmds */
4492         status = be_cmd_fw_init(adapter);
4493         if (status)
4494                 return status;
4495
4496         /* Allow interrupts for other ULPs running on NIC function */
4497         be_intr_set(adapter, true);
4498
4499         return 0;
4500 }
4501
4502 static int be_setup(struct be_adapter *adapter)
4503 {
4504         struct device *dev = &adapter->pdev->dev;
4505         int status;
4506
4507         status = be_func_init(adapter);
4508         if (status)
4509                 return status;
4510
4511         be_setup_init(adapter);
4512
4513         if (!lancer_chip(adapter))
4514                 be_cmd_req_native_mode(adapter);
4515
4516         /* invoke this cmd first to get pf_num and vf_num which are needed
4517          * for issuing profile related cmds
4518          */
4519         if (!BEx_chip(adapter)) {
4520                 status = be_cmd_get_func_config(adapter, NULL);
4521                 if (status)
4522                         return status;
4523         }
4524
4525         status = be_get_config(adapter);
4526         if (status)
4527                 goto err;
4528
4529         if (!BE2_chip(adapter) && be_physfn(adapter))
4530                 be_alloc_sriov_res(adapter);
4531
4532         status = be_get_resources(adapter);
4533         if (status)
4534                 goto err;
4535
4536         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4537                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4538         if (!adapter->pmac_id)
4539                 return -ENOMEM;
4540
4541         status = be_msix_enable(adapter);
4542         if (status)
4543                 goto err;
4544
4545         /* will enable all the needed filter flags in be_open() */
4546         status = be_if_create(adapter);
4547         if (status)
4548                 goto err;
4549
4550         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4551         rtnl_lock();
4552         status = be_setup_queues(adapter);
4553         rtnl_unlock();
4554         if (status)
4555                 goto err;
4556
4557         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4558
4559         status = be_mac_setup(adapter);
4560         if (status)
4561                 goto err;
4562
4563         be_cmd_get_fw_ver(adapter);
4564         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4565
4566         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4567                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4568                         adapter->fw_ver);
4569                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4570         }
4571
4572         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4573                                          adapter->rx_fc);
4574         if (status)
4575                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4576                                         &adapter->rx_fc);
4577
4578         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4579                  adapter->tx_fc, adapter->rx_fc);
4580
4581         if (be_physfn(adapter))
4582                 be_cmd_set_logical_link_config(adapter,
4583                                                IFLA_VF_LINK_STATE_AUTO, 0);
4584
4585         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4586          * confusing a linux bridge or OVS that it might be connected to.
4587          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4588          * when SRIOV is not enabled.
4589          */
4590         if (BE3_chip(adapter))
4591                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4592                                       PORT_FWD_TYPE_PASSTHRU, 0);
4593
4594         if (adapter->num_vfs)
4595                 be_vf_setup(adapter);
4596
4597         status = be_cmd_get_phy_info(adapter);
4598         if (!status && be_pause_supported(adapter))
4599                 adapter->phy.fc_autoneg = 1;
4600
4601         be_schedule_worker(adapter);
4602         adapter->flags |= BE_FLAGS_SETUP_DONE;
4603         return 0;
4604 err:
4605         be_clear(adapter);
4606         return status;
4607 }
4608
4609 #ifdef CONFIG_NET_POLL_CONTROLLER
4610 static void be_netpoll(struct net_device *netdev)
4611 {
4612         struct be_adapter *adapter = netdev_priv(netdev);
4613         struct be_eq_obj *eqo;
4614         int i;
4615
4616         for_all_evt_queues(adapter, eqo, i) {
4617                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4618                 napi_schedule(&eqo->napi);
4619         }
4620 }
4621 #endif
4622
4623 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4624 {
4625         const struct firmware *fw;
4626         int status;
4627
4628         if (!netif_running(adapter->netdev)) {
4629                 dev_err(&adapter->pdev->dev,
4630                         "Firmware load not allowed (interface is down)\n");
4631                 return -ENETDOWN;
4632         }
4633
4634         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4635         if (status)
4636                 goto fw_exit;
4637
4638         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4639
4640         if (lancer_chip(adapter))
4641                 status = lancer_fw_download(adapter, fw);
4642         else
4643                 status = be_fw_download(adapter, fw);
4644
4645         if (!status)
4646                 be_cmd_get_fw_ver(adapter);
4647
4648 fw_exit:
4649         release_firmware(fw);
4650         return status;
4651 }
4652
4653 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4654                                  u16 flags)
4655 {
4656         struct be_adapter *adapter = netdev_priv(dev);
4657         struct nlattr *attr, *br_spec;
4658         int rem;
4659         int status = 0;
4660         u16 mode = 0;
4661
4662         if (!sriov_enabled(adapter))
4663                 return -EOPNOTSUPP;
4664
4665         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4666         if (!br_spec)
4667                 return -EINVAL;
4668
4669         nla_for_each_nested(attr, br_spec, rem) {
4670                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4671                         continue;
4672
4673                 if (nla_len(attr) < sizeof(mode))
4674                         return -EINVAL;
4675
4676                 mode = nla_get_u16(attr);
4677                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4678                         return -EOPNOTSUPP;
4679
4680                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4681                         return -EINVAL;
4682
4683                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4684                                                adapter->if_handle,
4685                                                mode == BRIDGE_MODE_VEPA ?
4686                                                PORT_FWD_TYPE_VEPA :
4687                                                PORT_FWD_TYPE_VEB, 0);
4688                 if (status)
4689                         goto err;
4690
4691                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4692                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4693
4694                 return status;
4695         }
4696 err:
4697         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4698                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4699
4700         return status;
4701 }
4702
4703 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4704                                  struct net_device *dev, u32 filter_mask,
4705                                  int nlflags)
4706 {
4707         struct be_adapter *adapter = netdev_priv(dev);
4708         int status = 0;
4709         u8 hsw_mode;
4710
4711         /* BE and Lancer chips support VEB mode only */
4712         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4713                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4714                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4715                         return 0;
4716                 hsw_mode = PORT_FWD_TYPE_VEB;
4717         } else {
4718                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4719                                                adapter->if_handle, &hsw_mode,
4720                                                NULL);
4721                 if (status)
4722                         return 0;
4723
4724                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4725                         return 0;
4726         }
4727
4728         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4729                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4730                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4731                                        0, 0, nlflags, filter_mask, NULL);
4732 }
4733
4734 /* VxLAN offload Notes:
4735  *
4736  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4737  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4738  * is expected to work across all types of IP tunnels once exported. Skyhawk
4739  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4740  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4741  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4742  * those other tunnels are unexported on the fly through ndo_features_check().
4743  *
4744  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4745  * adds more than one port, disable offloads and don't re-enable them again
4746  * until after all the tunnels are removed.
4747  */
4748 static void be_add_vxlan_port(struct net_device *netdev,
4749                               struct udp_tunnel_info *ti)
4750 {
4751         struct be_adapter *adapter = netdev_priv(netdev);
4752         struct device *dev = &adapter->pdev->dev;
4753         __be16 port = ti->port;
4754         int status;
4755
4756         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4757                 return;
4758
4759         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4760                 return;
4761
4762         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4763                 adapter->vxlan_port_aliases++;
4764                 return;
4765         }
4766
4767         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4768                 dev_info(dev,
4769                          "Only one UDP port supported for VxLAN offloads\n");
4770                 dev_info(dev, "Disabling VxLAN offloads\n");
4771                 adapter->vxlan_port_count++;
4772                 goto err;
4773         }
4774
4775         if (adapter->vxlan_port_count++ >= 1)
4776                 return;
4777
4778         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4779                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4780         if (status) {
4781                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4782                 goto err;
4783         }
4784
4785         status = be_cmd_set_vxlan_port(adapter, port);
4786         if (status) {
4787                 dev_warn(dev, "Failed to add VxLAN port\n");
4788                 goto err;
4789         }
4790         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4791         adapter->vxlan_port = port;
4792
4793         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4794                                    NETIF_F_TSO | NETIF_F_TSO6 |
4795                                    NETIF_F_GSO_UDP_TUNNEL;
4796         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4797         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4798
4799         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4800                  be16_to_cpu(port));
4801         return;
4802 err:
4803         be_disable_vxlan_offloads(adapter);
4804 }
4805
4806 static void be_del_vxlan_port(struct net_device *netdev,
4807                               struct udp_tunnel_info *ti)
4808 {
4809         struct be_adapter *adapter = netdev_priv(netdev);
4810         __be16 port = ti->port;
4811
4812         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4813                 return;
4814
4815         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4816                 return;
4817
4818         if (adapter->vxlan_port != port)
4819                 goto done;
4820
4821         if (adapter->vxlan_port_aliases) {
4822                 adapter->vxlan_port_aliases--;
4823                 return;
4824         }
4825
4826         be_disable_vxlan_offloads(adapter);
4827
4828         dev_info(&adapter->pdev->dev,
4829                  "Disabled VxLAN offloads for UDP port %d\n",
4830                  be16_to_cpu(port));
4831 done:
4832         adapter->vxlan_port_count--;
4833 }
4834
4835 static netdev_features_t be_features_check(struct sk_buff *skb,
4836                                            struct net_device *dev,
4837                                            netdev_features_t features)
4838 {
4839         struct be_adapter *adapter = netdev_priv(dev);
4840         u8 l4_hdr = 0;
4841
4842         /* The code below restricts offload features for some tunneled packets.
4843          * Offload features for normal (non tunnel) packets are unchanged.
4844          */
4845         if (!skb->encapsulation ||
4846             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4847                 return features;
4848
4849         /* It's an encapsulated packet and VxLAN offloads are enabled. We
4850          * should disable tunnel offload features if it's not a VxLAN packet,
4851          * as tunnel offloads have been enabled only for VxLAN. This is done to
4852          * allow other tunneled traffic like GRE work fine while VxLAN
4853          * offloads are configured in Skyhawk-R.
4854          */
4855         switch (vlan_get_protocol(skb)) {
4856         case htons(ETH_P_IP):
4857                 l4_hdr = ip_hdr(skb)->protocol;
4858                 break;
4859         case htons(ETH_P_IPV6):
4860                 l4_hdr = ipv6_hdr(skb)->nexthdr;
4861                 break;
4862         default:
4863                 return features;
4864         }
4865
4866         if (l4_hdr != IPPROTO_UDP ||
4867             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4868             skb->inner_protocol != htons(ETH_P_TEB) ||
4869             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4870             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4871                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4872
4873         return features;
4874 }
4875
4876 static int be_get_phys_port_id(struct net_device *dev,
4877                                struct netdev_phys_item_id *ppid)
4878 {
4879         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4880         struct be_adapter *adapter = netdev_priv(dev);
4881         u8 *id;
4882
4883         if (MAX_PHYS_ITEM_ID_LEN < id_len)
4884                 return -ENOSPC;
4885
4886         ppid->id[0] = adapter->hba_port_num + 1;
4887         id = &ppid->id[1];
4888         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4889              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4890                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4891
4892         ppid->id_len = id_len;
4893
4894         return 0;
4895 }
4896
4897 static const struct net_device_ops be_netdev_ops = {
4898         .ndo_open               = be_open,
4899         .ndo_stop               = be_close,
4900         .ndo_start_xmit         = be_xmit,
4901         .ndo_set_rx_mode        = be_set_rx_mode,
4902         .ndo_set_mac_address    = be_mac_addr_set,
4903         .ndo_change_mtu         = be_change_mtu,
4904         .ndo_get_stats64        = be_get_stats64,
4905         .ndo_validate_addr      = eth_validate_addr,
4906         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
4907         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
4908         .ndo_set_vf_mac         = be_set_vf_mac,
4909         .ndo_set_vf_vlan        = be_set_vf_vlan,
4910         .ndo_set_vf_rate        = be_set_vf_tx_rate,
4911         .ndo_get_vf_config      = be_get_vf_config,
4912         .ndo_set_vf_link_state  = be_set_vf_link_state,
4913         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4914 #ifdef CONFIG_NET_POLL_CONTROLLER
4915         .ndo_poll_controller    = be_netpoll,
4916 #endif
4917         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
4918         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
4919 #ifdef CONFIG_NET_RX_BUSY_POLL
4920         .ndo_busy_poll          = be_busy_poll,
4921 #endif
4922         .ndo_udp_tunnel_add     = be_add_vxlan_port,
4923         .ndo_udp_tunnel_del     = be_del_vxlan_port,
4924         .ndo_features_check     = be_features_check,
4925         .ndo_get_phys_port_id   = be_get_phys_port_id,
4926 };
4927
4928 static void be_netdev_init(struct net_device *netdev)
4929 {
4930         struct be_adapter *adapter = netdev_priv(netdev);
4931
4932         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4933                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
4934                 NETIF_F_HW_VLAN_CTAG_TX;
4935         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
4936                 netdev->hw_features |= NETIF_F_RXHASH;
4937
4938         netdev->features |= netdev->hw_features |
4939                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
4940
4941         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4942                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4943
4944         netdev->priv_flags |= IFF_UNICAST_FLT;
4945
4946         netdev->flags |= IFF_MULTICAST;
4947
4948         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
4949
4950         netdev->netdev_ops = &be_netdev_ops;
4951
4952         netdev->ethtool_ops = &be_ethtool_ops;
4953 }
4954
4955 static void be_cleanup(struct be_adapter *adapter)
4956 {
4957         struct net_device *netdev = adapter->netdev;
4958
4959         rtnl_lock();
4960         netif_device_detach(netdev);
4961         if (netif_running(netdev))
4962                 be_close(netdev);
4963         rtnl_unlock();
4964
4965         be_clear(adapter);
4966 }
4967
4968 static int be_resume(struct be_adapter *adapter)
4969 {
4970         struct net_device *netdev = adapter->netdev;
4971         int status;
4972
4973         status = be_setup(adapter);
4974         if (status)
4975                 return status;
4976
4977         rtnl_lock();
4978         if (netif_running(netdev))
4979                 status = be_open(netdev);
4980         rtnl_unlock();
4981
4982         if (status)
4983                 return status;
4984
4985         netif_device_attach(netdev);
4986
4987         return 0;
4988 }
4989
4990 static int be_err_recover(struct be_adapter *adapter)
4991 {
4992         int status;
4993
4994         /* Error recovery is supported only Lancer as of now */
4995         if (!lancer_chip(adapter))
4996                 return -EIO;
4997
4998         /* Wait for adapter to reach quiescent state before
4999          * destroying queues
5000          */
5001         status = be_fw_wait_ready(adapter);
5002         if (status)
5003                 goto err;
5004
5005         be_cleanup(adapter);
5006
5007         status = be_resume(adapter);
5008         if (status)
5009                 goto err;
5010
5011         return 0;
5012 err:
5013         return status;
5014 }
5015
5016 static void be_err_detection_task(struct work_struct *work)
5017 {
5018         struct be_adapter *adapter =
5019                                 container_of(work, struct be_adapter,
5020                                              be_err_detection_work.work);
5021         struct device *dev = &adapter->pdev->dev;
5022         int recovery_status;
5023         int delay = ERR_DETECTION_DELAY;
5024
5025         be_detect_error(adapter);
5026
5027         if (be_check_error(adapter, BE_ERROR_HW))
5028                 recovery_status = be_err_recover(adapter);
5029         else
5030                 goto reschedule_task;
5031
5032         if (!recovery_status) {
5033                 adapter->recovery_retries = 0;
5034                 dev_info(dev, "Adapter recovery successful\n");
5035                 goto reschedule_task;
5036         } else if (be_virtfn(adapter)) {
5037                 /* For VFs, check if PF have allocated resources
5038                  * every second.
5039                  */
5040                 dev_err(dev, "Re-trying adapter recovery\n");
5041                 goto reschedule_task;
5042         } else if (adapter->recovery_retries++ <
5043                    MAX_ERR_RECOVERY_RETRY_COUNT) {
5044                 /* In case of another error during recovery, it takes 30 sec
5045                  * for adapter to come out of error. Retry error recovery after
5046                  * this time interval.
5047                  */
5048                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5049                 delay = ERR_RECOVERY_RETRY_DELAY;
5050                 goto reschedule_task;
5051         } else {
5052                 dev_err(dev, "Adapter recovery failed\n");
5053         }
5054
5055         return;
5056 reschedule_task:
5057         be_schedule_err_detection(adapter, delay);
5058 }
5059
5060 static void be_log_sfp_info(struct be_adapter *adapter)
5061 {
5062         int status;
5063
5064         status = be_cmd_query_sfp_info(adapter);
5065         if (!status) {
5066                 dev_err(&adapter->pdev->dev,
5067                         "Port %c: %s Vendor: %s part no: %s",
5068                         adapter->port_name,
5069                         be_misconfig_evt_port_state[adapter->phy_state],
5070                         adapter->phy.vendor_name,
5071                         adapter->phy.vendor_pn);
5072         }
5073         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5074 }
5075
5076 static void be_worker(struct work_struct *work)
5077 {
5078         struct be_adapter *adapter =
5079                 container_of(work, struct be_adapter, work.work);
5080         struct be_rx_obj *rxo;
5081         int i;
5082
5083         if (be_physfn(adapter) &&
5084             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5085                 be_cmd_get_die_temperature(adapter);
5086
5087         /* when interrupts are not yet enabled, just reap any pending
5088          * mcc completions
5089          */
5090         if (!netif_running(adapter->netdev)) {
5091                 local_bh_disable();
5092                 be_process_mcc(adapter);
5093                 local_bh_enable();
5094                 goto reschedule;
5095         }
5096
5097         if (!adapter->stats_cmd_sent) {
5098                 if (lancer_chip(adapter))
5099                         lancer_cmd_get_pport_stats(adapter,
5100                                                    &adapter->stats_cmd);
5101                 else
5102                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5103         }
5104
5105         for_all_rx_queues(adapter, rxo, i) {
5106                 /* Replenish RX-queues starved due to memory
5107                  * allocation failures.
5108                  */
5109                 if (rxo->rx_post_starved)
5110                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5111         }
5112
5113         /* EQ-delay update for Skyhawk is done while notifying EQ */
5114         if (!skyhawk_chip(adapter))
5115                 be_eqd_update(adapter, false);
5116
5117         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5118                 be_log_sfp_info(adapter);
5119
5120 reschedule:
5121         adapter->work_counter++;
5122         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
5123 }
5124
5125 static void be_unmap_pci_bars(struct be_adapter *adapter)
5126 {
5127         if (adapter->csr)
5128                 pci_iounmap(adapter->pdev, adapter->csr);
5129         if (adapter->db)
5130                 pci_iounmap(adapter->pdev, adapter->db);
5131         if (adapter->pcicfg && adapter->pcicfg_mapped)
5132                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5133 }
5134
5135 static int db_bar(struct be_adapter *adapter)
5136 {
5137         if (lancer_chip(adapter) || be_virtfn(adapter))
5138                 return 0;
5139         else
5140                 return 4;
5141 }
5142
5143 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5144 {
5145         if (skyhawk_chip(adapter)) {
5146                 adapter->roce_db.size = 4096;
5147                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5148                                                               db_bar(adapter));
5149                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5150                                                                db_bar(adapter));
5151         }
5152         return 0;
5153 }
5154
5155 static int be_map_pci_bars(struct be_adapter *adapter)
5156 {
5157         struct pci_dev *pdev = adapter->pdev;
5158         u8 __iomem *addr;
5159         u32 sli_intf;
5160
5161         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5162         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5163                                 SLI_INTF_FAMILY_SHIFT;
5164         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5165
5166         if (BEx_chip(adapter) && be_physfn(adapter)) {
5167                 adapter->csr = pci_iomap(pdev, 2, 0);
5168                 if (!adapter->csr)
5169                         return -ENOMEM;
5170         }
5171
5172         addr = pci_iomap(pdev, db_bar(adapter), 0);
5173         if (!addr)
5174                 goto pci_map_err;
5175         adapter->db = addr;
5176
5177         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5178                 if (be_physfn(adapter)) {
5179                         /* PCICFG is the 2nd BAR in BE2 */
5180                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5181                         if (!addr)
5182                                 goto pci_map_err;
5183                         adapter->pcicfg = addr;
5184                         adapter->pcicfg_mapped = true;
5185                 } else {
5186                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5187                         adapter->pcicfg_mapped = false;
5188                 }
5189         }
5190
5191         be_roce_map_pci_bars(adapter);
5192         return 0;
5193
5194 pci_map_err:
5195         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5196         be_unmap_pci_bars(adapter);
5197         return -ENOMEM;
5198 }
5199
5200 static void be_drv_cleanup(struct be_adapter *adapter)
5201 {
5202         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5203         struct device *dev = &adapter->pdev->dev;
5204
5205         if (mem->va)
5206                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5207
5208         mem = &adapter->rx_filter;
5209         if (mem->va)
5210                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5211
5212         mem = &adapter->stats_cmd;
5213         if (mem->va)
5214                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5215 }
5216
5217 /* Allocate and initialize various fields in be_adapter struct */
5218 static int be_drv_init(struct be_adapter *adapter)
5219 {
5220         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5221         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5222         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5223         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5224         struct device *dev = &adapter->pdev->dev;
5225         int status = 0;
5226
5227         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5228         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5229                                                  &mbox_mem_alloc->dma,
5230                                                  GFP_KERNEL);
5231         if (!mbox_mem_alloc->va)
5232                 return -ENOMEM;
5233
5234         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5235         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5236         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5237
5238         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5239         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5240                                             &rx_filter->dma, GFP_KERNEL);
5241         if (!rx_filter->va) {
5242                 status = -ENOMEM;
5243                 goto free_mbox;
5244         }
5245
5246         if (lancer_chip(adapter))
5247                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5248         else if (BE2_chip(adapter))
5249                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5250         else if (BE3_chip(adapter))
5251                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5252         else
5253                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5254         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5255                                             &stats_cmd->dma, GFP_KERNEL);
5256         if (!stats_cmd->va) {
5257                 status = -ENOMEM;
5258                 goto free_rx_filter;
5259         }
5260
5261         mutex_init(&adapter->mbox_lock);
5262         spin_lock_init(&adapter->mcc_lock);
5263         spin_lock_init(&adapter->mcc_cq_lock);
5264         init_completion(&adapter->et_cmd_compl);
5265
5266         pci_save_state(adapter->pdev);
5267
5268         INIT_DELAYED_WORK(&adapter->work, be_worker);
5269         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5270                           be_err_detection_task);
5271
5272         adapter->rx_fc = true;
5273         adapter->tx_fc = true;
5274
5275         /* Must be a power of 2 or else MODULO will BUG_ON */
5276         adapter->be_get_temp_freq = 64;
5277
5278         return 0;
5279
5280 free_rx_filter:
5281         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5282 free_mbox:
5283         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5284                           mbox_mem_alloc->dma);
5285         return status;
5286 }
5287
5288 static void be_remove(struct pci_dev *pdev)
5289 {
5290         struct be_adapter *adapter = pci_get_drvdata(pdev);
5291
5292         if (!adapter)
5293                 return;
5294
5295         be_roce_dev_remove(adapter);
5296         be_intr_set(adapter, false);
5297
5298         be_cancel_err_detection(adapter);
5299
5300         unregister_netdev(adapter->netdev);
5301
5302         be_clear(adapter);
5303
5304         /* tell fw we're done with firing cmds */
5305         be_cmd_fw_clean(adapter);
5306
5307         be_unmap_pci_bars(adapter);
5308         be_drv_cleanup(adapter);
5309
5310         pci_disable_pcie_error_reporting(pdev);
5311
5312         pci_release_regions(pdev);
5313         pci_disable_device(pdev);
5314
5315         free_netdev(adapter->netdev);
5316 }
5317
5318 static ssize_t be_hwmon_show_temp(struct device *dev,
5319                                   struct device_attribute *dev_attr,
5320                                   char *buf)
5321 {
5322         struct be_adapter *adapter = dev_get_drvdata(dev);
5323
5324         /* Unit: millidegree Celsius */
5325         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5326                 return -EIO;
5327         else
5328                 return sprintf(buf, "%u\n",
5329                                adapter->hwmon_info.be_on_die_temp * 1000);
5330 }
5331
5332 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5333                           be_hwmon_show_temp, NULL, 1);
5334
5335 static struct attribute *be_hwmon_attrs[] = {
5336         &sensor_dev_attr_temp1_input.dev_attr.attr,
5337         NULL
5338 };
5339
5340 ATTRIBUTE_GROUPS(be_hwmon);
5341
5342 static char *mc_name(struct be_adapter *adapter)
5343 {
5344         char *str = ""; /* default */
5345
5346         switch (adapter->mc_type) {
5347         case UMC:
5348                 str = "UMC";
5349                 break;
5350         case FLEX10:
5351                 str = "FLEX10";
5352                 break;
5353         case vNIC1:
5354                 str = "vNIC-1";
5355                 break;
5356         case nPAR:
5357                 str = "nPAR";
5358                 break;
5359         case UFP:
5360                 str = "UFP";
5361                 break;
5362         case vNIC2:
5363                 str = "vNIC-2";
5364                 break;
5365         default:
5366                 str = "";
5367         }
5368
5369         return str;
5370 }
5371
5372 static inline char *func_name(struct be_adapter *adapter)
5373 {
5374         return be_physfn(adapter) ? "PF" : "VF";
5375 }
5376
5377 static inline char *nic_name(struct pci_dev *pdev)
5378 {
5379         switch (pdev->device) {
5380         case OC_DEVICE_ID1:
5381                 return OC_NAME;
5382         case OC_DEVICE_ID2:
5383                 return OC_NAME_BE;
5384         case OC_DEVICE_ID3:
5385         case OC_DEVICE_ID4:
5386                 return OC_NAME_LANCER;
5387         case BE_DEVICE_ID2:
5388                 return BE3_NAME;
5389         case OC_DEVICE_ID5:
5390         case OC_DEVICE_ID6:
5391                 return OC_NAME_SH;
5392         default:
5393                 return BE_NAME;
5394         }
5395 }
5396
5397 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5398 {
5399         struct be_adapter *adapter;
5400         struct net_device *netdev;
5401         int status = 0;
5402
5403         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5404
5405         status = pci_enable_device(pdev);
5406         if (status)
5407                 goto do_none;
5408
5409         status = pci_request_regions(pdev, DRV_NAME);
5410         if (status)
5411                 goto disable_dev;
5412         pci_set_master(pdev);
5413
5414         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5415         if (!netdev) {
5416                 status = -ENOMEM;
5417                 goto rel_reg;
5418         }
5419         adapter = netdev_priv(netdev);
5420         adapter->pdev = pdev;
5421         pci_set_drvdata(pdev, adapter);
5422         adapter->netdev = netdev;
5423         SET_NETDEV_DEV(netdev, &pdev->dev);
5424
5425         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5426         if (!status) {
5427                 netdev->features |= NETIF_F_HIGHDMA;
5428         } else {
5429                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5430                 if (status) {
5431                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5432                         goto free_netdev;
5433                 }
5434         }
5435
5436         status = pci_enable_pcie_error_reporting(pdev);
5437         if (!status)
5438                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5439
5440         status = be_map_pci_bars(adapter);
5441         if (status)
5442                 goto free_netdev;
5443
5444         status = be_drv_init(adapter);
5445         if (status)
5446                 goto unmap_bars;
5447
5448         status = be_setup(adapter);
5449         if (status)
5450                 goto drv_cleanup;
5451
5452         be_netdev_init(netdev);
5453         status = register_netdev(netdev);
5454         if (status != 0)
5455                 goto unsetup;
5456
5457         be_roce_dev_add(adapter);
5458
5459         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5460
5461         /* On Die temperature not supported for VF. */
5462         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5463                 adapter->hwmon_info.hwmon_dev =
5464                         devm_hwmon_device_register_with_groups(&pdev->dev,
5465                                                                DRV_NAME,
5466                                                                adapter,
5467                                                                be_hwmon_groups);
5468                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5469         }
5470
5471         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5472                  func_name(adapter), mc_name(adapter), adapter->port_name);
5473
5474         return 0;
5475
5476 unsetup:
5477         be_clear(adapter);
5478 drv_cleanup:
5479         be_drv_cleanup(adapter);
5480 unmap_bars:
5481         be_unmap_pci_bars(adapter);
5482 free_netdev:
5483         free_netdev(netdev);
5484 rel_reg:
5485         pci_release_regions(pdev);
5486 disable_dev:
5487         pci_disable_device(pdev);
5488 do_none:
5489         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5490         return status;
5491 }
5492
5493 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5494 {
5495         struct be_adapter *adapter = pci_get_drvdata(pdev);
5496
5497         be_intr_set(adapter, false);
5498         be_cancel_err_detection(adapter);
5499
5500         be_cleanup(adapter);
5501
5502         pci_save_state(pdev);
5503         pci_disable_device(pdev);
5504         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5505         return 0;
5506 }
5507
5508 static int be_pci_resume(struct pci_dev *pdev)
5509 {
5510         struct be_adapter *adapter = pci_get_drvdata(pdev);
5511         int status = 0;
5512
5513         status = pci_enable_device(pdev);
5514         if (status)
5515                 return status;
5516
5517         pci_restore_state(pdev);
5518
5519         status = be_resume(adapter);
5520         if (status)
5521                 return status;
5522
5523         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5524
5525         return 0;
5526 }
5527
5528 /*
5529  * An FLR will stop BE from DMAing any data.
5530  */
5531 static void be_shutdown(struct pci_dev *pdev)
5532 {
5533         struct be_adapter *adapter = pci_get_drvdata(pdev);
5534
5535         if (!adapter)
5536                 return;
5537
5538         be_roce_dev_shutdown(adapter);
5539         cancel_delayed_work_sync(&adapter->work);
5540         be_cancel_err_detection(adapter);
5541
5542         netif_device_detach(adapter->netdev);
5543
5544         be_cmd_reset_function(adapter);
5545
5546         pci_disable_device(pdev);
5547 }
5548
5549 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5550                                             pci_channel_state_t state)
5551 {
5552         struct be_adapter *adapter = pci_get_drvdata(pdev);
5553
5554         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5555
5556         be_roce_dev_remove(adapter);
5557
5558         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5559                 be_set_error(adapter, BE_ERROR_EEH);
5560
5561                 be_cancel_err_detection(adapter);
5562
5563                 be_cleanup(adapter);
5564         }
5565
5566         if (state == pci_channel_io_perm_failure)
5567                 return PCI_ERS_RESULT_DISCONNECT;
5568
5569         pci_disable_device(pdev);
5570
5571         /* The error could cause the FW to trigger a flash debug dump.
5572          * Resetting the card while flash dump is in progress
5573          * can cause it not to recover; wait for it to finish.
5574          * Wait only for first function as it is needed only once per
5575          * adapter.
5576          */
5577         if (pdev->devfn == 0)
5578                 ssleep(30);
5579
5580         return PCI_ERS_RESULT_NEED_RESET;
5581 }
5582
5583 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5584 {
5585         struct be_adapter *adapter = pci_get_drvdata(pdev);
5586         int status;
5587
5588         dev_info(&adapter->pdev->dev, "EEH reset\n");
5589
5590         status = pci_enable_device(pdev);
5591         if (status)
5592                 return PCI_ERS_RESULT_DISCONNECT;
5593
5594         pci_set_master(pdev);
5595         pci_restore_state(pdev);
5596
5597         /* Check if card is ok and fw is ready */
5598         dev_info(&adapter->pdev->dev,
5599                  "Waiting for FW to be ready after EEH reset\n");
5600         status = be_fw_wait_ready(adapter);
5601         if (status)
5602                 return PCI_ERS_RESULT_DISCONNECT;
5603
5604         pci_cleanup_aer_uncorrect_error_status(pdev);
5605         be_clear_error(adapter, BE_CLEAR_ALL);
5606         return PCI_ERS_RESULT_RECOVERED;
5607 }
5608
5609 static void be_eeh_resume(struct pci_dev *pdev)
5610 {
5611         int status = 0;
5612         struct be_adapter *adapter = pci_get_drvdata(pdev);
5613
5614         dev_info(&adapter->pdev->dev, "EEH resume\n");
5615
5616         pci_save_state(pdev);
5617
5618         status = be_resume(adapter);
5619         if (status)
5620                 goto err;
5621
5622         be_roce_dev_add(adapter);
5623
5624         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5625         return;
5626 err:
5627         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5628 }
5629
5630 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5631 {
5632         struct be_adapter *adapter = pci_get_drvdata(pdev);
5633         struct be_resources vft_res = {0};
5634         int status;
5635
5636         if (!num_vfs)
5637                 be_vf_clear(adapter);
5638
5639         adapter->num_vfs = num_vfs;
5640
5641         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5642                 dev_warn(&pdev->dev,
5643                          "Cannot disable VFs while they are assigned\n");
5644                 return -EBUSY;
5645         }
5646
5647         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5648          * are equally distributed across the max-number of VFs. The user may
5649          * request only a subset of the max-vfs to be enabled.
5650          * Based on num_vfs, redistribute the resources across num_vfs so that
5651          * each VF will have access to more number of resources.
5652          * This facility is not available in BE3 FW.
5653          * Also, this is done by FW in Lancer chip.
5654          */
5655         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5656                 be_calculate_vf_res(adapter, adapter->num_vfs,
5657                                     &vft_res);
5658                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5659                                                  adapter->num_vfs, &vft_res);
5660                 if (status)
5661                         dev_err(&pdev->dev,
5662                                 "Failed to optimize SR-IOV resources\n");
5663         }
5664
5665         status = be_get_resources(adapter);
5666         if (status)
5667                 return be_cmd_status(status);
5668
5669         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5670         rtnl_lock();
5671         status = be_update_queues(adapter);
5672         rtnl_unlock();
5673         if (status)
5674                 return be_cmd_status(status);
5675
5676         if (adapter->num_vfs)
5677                 status = be_vf_setup(adapter);
5678
5679         if (!status)
5680                 return adapter->num_vfs;
5681
5682         return 0;
5683 }
5684
5685 static const struct pci_error_handlers be_eeh_handlers = {
5686         .error_detected = be_eeh_err_detected,
5687         .slot_reset = be_eeh_reset,
5688         .resume = be_eeh_resume,
5689 };
5690
5691 static struct pci_driver be_driver = {
5692         .name = DRV_NAME,
5693         .id_table = be_dev_ids,
5694         .probe = be_probe,
5695         .remove = be_remove,
5696         .suspend = be_suspend,
5697         .resume = be_pci_resume,
5698         .shutdown = be_shutdown,
5699         .sriov_configure = be_pci_sriov_configure,
5700         .err_handler = &be_eeh_handlers
5701 };
5702
5703 static int __init be_init_module(void)
5704 {
5705         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5706             rx_frag_size != 2048) {
5707                 printk(KERN_WARNING DRV_NAME
5708                         " : Module param rx_frag_size must be 2048/4096/8192."
5709                         " Using 2048\n");
5710                 rx_frag_size = 2048;
5711         }
5712
5713         if (num_vfs > 0) {
5714                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5715                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5716         }
5717
5718         return pci_register_driver(&be_driver);
5719 }
5720 module_init(be_init_module);
5721
5722 static void __exit be_exit_module(void)
5723 {
5724         pci_unregister_driver(&be_driver);
5725 }
5726 module_exit(be_exit_module);