Merge tag 'shared-for-4.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/leon...
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56
57 /* Workqueue used by all functions for defering cmd calls to the adapter */
58 struct workqueue_struct *be_wq;
59
60 /* UE Status Low CSR */
61 static const char * const ue_status_low_desc[] = {
62         "CEV",
63         "CTX",
64         "DBUF",
65         "ERX",
66         "Host",
67         "MPU",
68         "NDMA",
69         "PTC ",
70         "RDMA ",
71         "RXF ",
72         "RXIPS ",
73         "RXULP0 ",
74         "RXULP1 ",
75         "RXULP2 ",
76         "TIM ",
77         "TPOST ",
78         "TPRE ",
79         "TXIPS ",
80         "TXULP0 ",
81         "TXULP1 ",
82         "UC ",
83         "WDMA ",
84         "TXULP2 ",
85         "HOST1 ",
86         "P0_OB_LINK ",
87         "P1_OB_LINK ",
88         "HOST_GPIO ",
89         "MBOX ",
90         "ERX2 ",
91         "SPARE ",
92         "JTAG ",
93         "MPU_INTPEND "
94 };
95
96 /* UE Status High CSR */
97 static const char * const ue_status_hi_desc[] = {
98         "LPCMEMHOST",
99         "MGMT_MAC",
100         "PCS0ONLINE",
101         "MPU_IRAM",
102         "PCS1ONLINE",
103         "PCTL0",
104         "PCTL1",
105         "PMEM",
106         "RR",
107         "TXPB",
108         "RXPP",
109         "XAUI",
110         "TXP",
111         "ARM",
112         "IPC",
113         "HOST2",
114         "HOST3",
115         "HOST4",
116         "HOST5",
117         "HOST6",
118         "HOST7",
119         "ECRC",
120         "Poison TLP",
121         "NETC",
122         "PERIPH",
123         "LLTXULP",
124         "D2P",
125         "RCON",
126         "LDMA",
127         "LLTXP",
128         "LLTXPB",
129         "Unknown"
130 };
131
132 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
133                                  BE_IF_FLAGS_BROADCAST | \
134                                  BE_IF_FLAGS_MULTICAST | \
135                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
136
137 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
138 {
139         struct be_dma_mem *mem = &q->dma_mem;
140
141         if (mem->va) {
142                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
143                                   mem->dma);
144                 mem->va = NULL;
145         }
146 }
147
148 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
149                           u16 len, u16 entry_size)
150 {
151         struct be_dma_mem *mem = &q->dma_mem;
152
153         memset(q, 0, sizeof(*q));
154         q->len = len;
155         q->entry_size = entry_size;
156         mem->size = len * entry_size;
157         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
158                                       GFP_KERNEL);
159         if (!mem->va)
160                 return -ENOMEM;
161         return 0;
162 }
163
164 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
165 {
166         u32 reg, enabled;
167
168         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
169                               &reg);
170         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
171
172         if (!enabled && enable)
173                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
174         else if (enabled && !enable)
175                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176         else
177                 return;
178
179         pci_write_config_dword(adapter->pdev,
180                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
181 }
182
183 static void be_intr_set(struct be_adapter *adapter, bool enable)
184 {
185         int status = 0;
186
187         /* On lancer interrupts can't be controlled via this register */
188         if (lancer_chip(adapter))
189                 return;
190
191         if (be_check_error(adapter, BE_ERROR_EEH))
192                 return;
193
194         status = be_cmd_intr_set(adapter, enable);
195         if (status)
196                 be_reg_intr_set(adapter, enable);
197 }
198
199 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
200 {
201         u32 val = 0;
202
203         if (be_check_error(adapter, BE_ERROR_HW))
204                 return;
205
206         val |= qid & DB_RQ_RING_ID_MASK;
207         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
208
209         wmb();
210         iowrite32(val, adapter->db + DB_RQ_OFFSET);
211 }
212
213 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
214                           u16 posted)
215 {
216         u32 val = 0;
217
218         if (be_check_error(adapter, BE_ERROR_HW))
219                 return;
220
221         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
222         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
223
224         wmb();
225         iowrite32(val, adapter->db + txo->db_offset);
226 }
227
228 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
229                          bool arm, bool clear_int, u16 num_popped,
230                          u32 eq_delay_mult_enc)
231 {
232         u32 val = 0;
233
234         val |= qid & DB_EQ_RING_ID_MASK;
235         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
236
237         if (be_check_error(adapter, BE_ERROR_HW))
238                 return;
239
240         if (arm)
241                 val |= 1 << DB_EQ_REARM_SHIFT;
242         if (clear_int)
243                 val |= 1 << DB_EQ_CLR_SHIFT;
244         val |= 1 << DB_EQ_EVNT_SHIFT;
245         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
246         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
247         iowrite32(val, adapter->db + DB_EQ_OFFSET);
248 }
249
250 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
251 {
252         u32 val = 0;
253
254         val |= qid & DB_CQ_RING_ID_MASK;
255         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
256                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
257
258         if (be_check_error(adapter, BE_ERROR_HW))
259                 return;
260
261         if (arm)
262                 val |= 1 << DB_CQ_REARM_SHIFT;
263         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
264         iowrite32(val, adapter->db + DB_CQ_OFFSET);
265 }
266
267 static int be_mac_addr_set(struct net_device *netdev, void *p)
268 {
269         struct be_adapter *adapter = netdev_priv(netdev);
270         struct device *dev = &adapter->pdev->dev;
271         struct sockaddr *addr = p;
272         int status;
273         u8 mac[ETH_ALEN];
274         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
275
276         if (!is_valid_ether_addr(addr->sa_data))
277                 return -EADDRNOTAVAIL;
278
279         /* Proceed further only if, User provided MAC is different
280          * from active MAC
281          */
282         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
283                 return 0;
284
285         /* if device is not running, copy MAC to netdev->dev_addr */
286         if (!netif_running(netdev))
287                 goto done;
288
289         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
290          * privilege or if PF did not provision the new MAC address.
291          * On BE3, this cmd will always fail if the VF doesn't have the
292          * FILTMGMT privilege. This failure is OK, only if the PF programmed
293          * the MAC for the VF.
294          */
295         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
296                                  adapter->if_handle, &adapter->pmac_id[0], 0);
297         if (!status) {
298                 curr_pmac_id = adapter->pmac_id[0];
299
300                 /* Delete the old programmed MAC. This call may fail if the
301                  * old MAC was already deleted by the PF driver.
302                  */
303                 if (adapter->pmac_id[0] != old_pmac_id)
304                         be_cmd_pmac_del(adapter, adapter->if_handle,
305                                         old_pmac_id, 0);
306         }
307
308         /* Decide if the new MAC is successfully activated only after
309          * querying the FW
310          */
311         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
312                                        adapter->if_handle, true, 0);
313         if (status)
314                 goto err;
315
316         /* The MAC change did not happen, either due to lack of privilege
317          * or PF didn't pre-provision.
318          */
319         if (!ether_addr_equal(addr->sa_data, mac)) {
320                 status = -EPERM;
321                 goto err;
322         }
323 done:
324         ether_addr_copy(netdev->dev_addr, addr->sa_data);
325         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
326         return 0;
327 err:
328         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
329         return status;
330 }
331
332 /* BE2 supports only v0 cmd */
333 static void *hw_stats_from_cmd(struct be_adapter *adapter)
334 {
335         if (BE2_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else if (BE3_chip(adapter)) {
340                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         } else {
344                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
345
346                 return &cmd->hw_stats;
347         }
348 }
349
350 /* BE2 supports only v0 cmd */
351 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
352 {
353         if (BE2_chip(adapter)) {
354                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else if (BE3_chip(adapter)) {
358                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         } else {
362                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
363
364                 return &hw_stats->erx;
365         }
366 }
367
368 static void populate_be_v0_stats(struct be_adapter *adapter)
369 {
370         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
371         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
372         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
373         struct be_port_rxf_stats_v0 *port_stats =
374                                         &rxf_stats->port[adapter->port_num];
375         struct be_drv_stats *drvs = &adapter->drv_stats;
376
377         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
378         drvs->rx_pause_frames = port_stats->rx_pause_frames;
379         drvs->rx_crc_errors = port_stats->rx_crc_errors;
380         drvs->rx_control_frames = port_stats->rx_control_frames;
381         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
382         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
383         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
384         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
385         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
386         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
387         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
388         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
389         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
390         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
391         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
392         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
393         drvs->rx_dropped_header_too_small =
394                 port_stats->rx_dropped_header_too_small;
395         drvs->rx_address_filtered =
396                                         port_stats->rx_address_filtered +
397                                         port_stats->rx_vlan_filtered;
398         drvs->rx_alignment_symbol_errors =
399                 port_stats->rx_alignment_symbol_errors;
400
401         drvs->tx_pauseframes = port_stats->tx_pauseframes;
402         drvs->tx_controlframes = port_stats->tx_controlframes;
403
404         if (adapter->port_num)
405                 drvs->jabber_events = rxf_stats->port1_jabber_events;
406         else
407                 drvs->jabber_events = rxf_stats->port0_jabber_events;
408         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
409         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
410         drvs->forwarded_packets = rxf_stats->forwarded_packets;
411         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
412         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
413         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
414         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
415 }
416
417 static void populate_be_v1_stats(struct be_adapter *adapter)
418 {
419         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
420         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
421         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
422         struct be_port_rxf_stats_v1 *port_stats =
423                                         &rxf_stats->port[adapter->port_num];
424         struct be_drv_stats *drvs = &adapter->drv_stats;
425
426         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
427         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
428         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
429         drvs->rx_pause_frames = port_stats->rx_pause_frames;
430         drvs->rx_crc_errors = port_stats->rx_crc_errors;
431         drvs->rx_control_frames = port_stats->rx_control_frames;
432         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
433         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
434         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
435         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
436         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
437         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
438         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
439         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
440         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
441         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
442         drvs->rx_dropped_header_too_small =
443                 port_stats->rx_dropped_header_too_small;
444         drvs->rx_input_fifo_overflow_drop =
445                 port_stats->rx_input_fifo_overflow_drop;
446         drvs->rx_address_filtered = port_stats->rx_address_filtered;
447         drvs->rx_alignment_symbol_errors =
448                 port_stats->rx_alignment_symbol_errors;
449         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
450         drvs->tx_pauseframes = port_stats->tx_pauseframes;
451         drvs->tx_controlframes = port_stats->tx_controlframes;
452         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
453         drvs->jabber_events = port_stats->jabber_events;
454         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
455         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
456         drvs->forwarded_packets = rxf_stats->forwarded_packets;
457         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
458         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
459         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
460         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
461 }
462
463 static void populate_be_v2_stats(struct be_adapter *adapter)
464 {
465         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
466         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
467         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
468         struct be_port_rxf_stats_v2 *port_stats =
469                                         &rxf_stats->port[adapter->port_num];
470         struct be_drv_stats *drvs = &adapter->drv_stats;
471
472         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
473         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
474         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
475         drvs->rx_pause_frames = port_stats->rx_pause_frames;
476         drvs->rx_crc_errors = port_stats->rx_crc_errors;
477         drvs->rx_control_frames = port_stats->rx_control_frames;
478         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
479         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
480         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
481         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
482         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
483         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
484         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
485         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
486         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
487         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
488         drvs->rx_dropped_header_too_small =
489                 port_stats->rx_dropped_header_too_small;
490         drvs->rx_input_fifo_overflow_drop =
491                 port_stats->rx_input_fifo_overflow_drop;
492         drvs->rx_address_filtered = port_stats->rx_address_filtered;
493         drvs->rx_alignment_symbol_errors =
494                 port_stats->rx_alignment_symbol_errors;
495         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
496         drvs->tx_pauseframes = port_stats->tx_pauseframes;
497         drvs->tx_controlframes = port_stats->tx_controlframes;
498         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
499         drvs->jabber_events = port_stats->jabber_events;
500         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
501         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
502         drvs->forwarded_packets = rxf_stats->forwarded_packets;
503         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
504         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
505         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
506         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
507         if (be_roce_supported(adapter)) {
508                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
509                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
510                 drvs->rx_roce_frames = port_stats->roce_frames_received;
511                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
512                 drvs->roce_drops_payload_len =
513                         port_stats->roce_drops_payload_len;
514         }
515 }
516
517 static void populate_lancer_stats(struct be_adapter *adapter)
518 {
519         struct be_drv_stats *drvs = &adapter->drv_stats;
520         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
521
522         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
523         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
524         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
525         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
526         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
527         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
528         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
529         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
530         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
531         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
532         drvs->rx_dropped_tcp_length =
533                                 pport_stats->rx_dropped_invalid_tcp_length;
534         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
535         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
536         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
537         drvs->rx_dropped_header_too_small =
538                                 pport_stats->rx_dropped_header_too_small;
539         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
540         drvs->rx_address_filtered =
541                                         pport_stats->rx_address_filtered +
542                                         pport_stats->rx_vlan_filtered;
543         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
544         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
545         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
546         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
547         drvs->jabber_events = pport_stats->rx_jabbers;
548         drvs->forwarded_packets = pport_stats->num_forwards_lo;
549         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
550         drvs->rx_drops_too_many_frags =
551                                 pport_stats->rx_drops_too_many_frags_lo;
552 }
553
554 static void accumulate_16bit_val(u32 *acc, u16 val)
555 {
556 #define lo(x)                   (x & 0xFFFF)
557 #define hi(x)                   (x & 0xFFFF0000)
558         bool wrapped = val < lo(*acc);
559         u32 newacc = hi(*acc) + val;
560
561         if (wrapped)
562                 newacc += 65536;
563         ACCESS_ONCE(*acc) = newacc;
564 }
565
566 static void populate_erx_stats(struct be_adapter *adapter,
567                                struct be_rx_obj *rxo, u32 erx_stat)
568 {
569         if (!BEx_chip(adapter))
570                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
571         else
572                 /* below erx HW counter can actually wrap around after
573                  * 65535. Driver accumulates a 32-bit value
574                  */
575                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
576                                      (u16)erx_stat);
577 }
578
579 void be_parse_stats(struct be_adapter *adapter)
580 {
581         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
582         struct be_rx_obj *rxo;
583         int i;
584         u32 erx_stat;
585
586         if (lancer_chip(adapter)) {
587                 populate_lancer_stats(adapter);
588         } else {
589                 if (BE2_chip(adapter))
590                         populate_be_v0_stats(adapter);
591                 else if (BE3_chip(adapter))
592                         /* for BE3 */
593                         populate_be_v1_stats(adapter);
594                 else
595                         populate_be_v2_stats(adapter);
596
597                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
598                 for_all_rx_queues(adapter, rxo, i) {
599                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
600                         populate_erx_stats(adapter, rxo, erx_stat);
601                 }
602         }
603 }
604
605 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
606                                                 struct rtnl_link_stats64 *stats)
607 {
608         struct be_adapter *adapter = netdev_priv(netdev);
609         struct be_drv_stats *drvs = &adapter->drv_stats;
610         struct be_rx_obj *rxo;
611         struct be_tx_obj *txo;
612         u64 pkts, bytes;
613         unsigned int start;
614         int i;
615
616         for_all_rx_queues(adapter, rxo, i) {
617                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
618
619                 do {
620                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
621                         pkts = rx_stats(rxo)->rx_pkts;
622                         bytes = rx_stats(rxo)->rx_bytes;
623                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
624                 stats->rx_packets += pkts;
625                 stats->rx_bytes += bytes;
626                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
627                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
628                                         rx_stats(rxo)->rx_drops_no_frags;
629         }
630
631         for_all_tx_queues(adapter, txo, i) {
632                 const struct be_tx_stats *tx_stats = tx_stats(txo);
633
634                 do {
635                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
636                         pkts = tx_stats(txo)->tx_pkts;
637                         bytes = tx_stats(txo)->tx_bytes;
638                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
639                 stats->tx_packets += pkts;
640                 stats->tx_bytes += bytes;
641         }
642
643         /* bad pkts received */
644         stats->rx_errors = drvs->rx_crc_errors +
645                 drvs->rx_alignment_symbol_errors +
646                 drvs->rx_in_range_errors +
647                 drvs->rx_out_range_errors +
648                 drvs->rx_frame_too_long +
649                 drvs->rx_dropped_too_small +
650                 drvs->rx_dropped_too_short +
651                 drvs->rx_dropped_header_too_small +
652                 drvs->rx_dropped_tcp_length +
653                 drvs->rx_dropped_runt;
654
655         /* detailed rx errors */
656         stats->rx_length_errors = drvs->rx_in_range_errors +
657                 drvs->rx_out_range_errors +
658                 drvs->rx_frame_too_long;
659
660         stats->rx_crc_errors = drvs->rx_crc_errors;
661
662         /* frame alignment errors */
663         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
664
665         /* receiver fifo overrun */
666         /* drops_no_pbuf is no per i/f, it's per BE card */
667         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
668                                 drvs->rx_input_fifo_overflow_drop +
669                                 drvs->rx_drops_no_pbuf;
670         return stats;
671 }
672
673 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
674 {
675         struct net_device *netdev = adapter->netdev;
676
677         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
678                 netif_carrier_off(netdev);
679                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
680         }
681
682         if (link_status)
683                 netif_carrier_on(netdev);
684         else
685                 netif_carrier_off(netdev);
686
687         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
688 }
689
690 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
691 {
692         struct be_tx_stats *stats = tx_stats(txo);
693         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
694
695         u64_stats_update_begin(&stats->sync);
696         stats->tx_reqs++;
697         stats->tx_bytes += skb->len;
698         stats->tx_pkts += tx_pkts;
699         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
700                 stats->tx_vxlan_offload_pkts += tx_pkts;
701         u64_stats_update_end(&stats->sync);
702 }
703
704 /* Returns number of WRBs needed for the skb */
705 static u32 skb_wrb_cnt(struct sk_buff *skb)
706 {
707         /* +1 for the header wrb */
708         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
709 }
710
711 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
712 {
713         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
714         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
715         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
716         wrb->rsvd0 = 0;
717 }
718
719 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
720  * to avoid the swap and shift/mask operations in wrb_fill().
721  */
722 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
723 {
724         wrb->frag_pa_hi = 0;
725         wrb->frag_pa_lo = 0;
726         wrb->frag_len = 0;
727         wrb->rsvd0 = 0;
728 }
729
730 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
731                                      struct sk_buff *skb)
732 {
733         u8 vlan_prio;
734         u16 vlan_tag;
735
736         vlan_tag = skb_vlan_tag_get(skb);
737         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
738         /* If vlan priority provided by OS is NOT in available bmap */
739         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
740                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
741                                 adapter->recommended_prio_bits;
742
743         return vlan_tag;
744 }
745
746 /* Used only for IP tunnel packets */
747 static u16 skb_inner_ip_proto(struct sk_buff *skb)
748 {
749         return (inner_ip_hdr(skb)->version == 4) ?
750                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
751 }
752
753 static u16 skb_ip_proto(struct sk_buff *skb)
754 {
755         return (ip_hdr(skb)->version == 4) ?
756                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
757 }
758
759 static inline bool be_is_txq_full(struct be_tx_obj *txo)
760 {
761         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
762 }
763
764 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
765 {
766         return atomic_read(&txo->q.used) < txo->q.len / 2;
767 }
768
769 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
770 {
771         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
772 }
773
774 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
775                                        struct sk_buff *skb,
776                                        struct be_wrb_params *wrb_params)
777 {
778         u16 proto;
779
780         if (skb_is_gso(skb)) {
781                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
782                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
783                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
784                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
785         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
786                 if (skb->encapsulation) {
787                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
788                         proto = skb_inner_ip_proto(skb);
789                 } else {
790                         proto = skb_ip_proto(skb);
791                 }
792                 if (proto == IPPROTO_TCP)
793                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
794                 else if (proto == IPPROTO_UDP)
795                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
796         }
797
798         if (skb_vlan_tag_present(skb)) {
799                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
800                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
801         }
802
803         BE_WRB_F_SET(wrb_params->features, CRC, 1);
804 }
805
806 static void wrb_fill_hdr(struct be_adapter *adapter,
807                          struct be_eth_hdr_wrb *hdr,
808                          struct be_wrb_params *wrb_params,
809                          struct sk_buff *skb)
810 {
811         memset(hdr, 0, sizeof(*hdr));
812
813         SET_TX_WRB_HDR_BITS(crc, hdr,
814                             BE_WRB_F_GET(wrb_params->features, CRC));
815         SET_TX_WRB_HDR_BITS(ipcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, IPCS));
817         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
818                             BE_WRB_F_GET(wrb_params->features, TCPCS));
819         SET_TX_WRB_HDR_BITS(udpcs, hdr,
820                             BE_WRB_F_GET(wrb_params->features, UDPCS));
821
822         SET_TX_WRB_HDR_BITS(lso, hdr,
823                             BE_WRB_F_GET(wrb_params->features, LSO));
824         SET_TX_WRB_HDR_BITS(lso6, hdr,
825                             BE_WRB_F_GET(wrb_params->features, LSO6));
826         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
827
828         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
829          * hack is not needed, the evt bit is set while ringing DB.
830          */
831         SET_TX_WRB_HDR_BITS(event, hdr,
832                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
833         SET_TX_WRB_HDR_BITS(vlan, hdr,
834                             BE_WRB_F_GET(wrb_params->features, VLAN));
835         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
836
837         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
838         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
839         SET_TX_WRB_HDR_BITS(mgmt, hdr,
840                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
841 }
842
843 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
844                           bool unmap_single)
845 {
846         dma_addr_t dma;
847         u32 frag_len = le32_to_cpu(wrb->frag_len);
848
849
850         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
851                 (u64)le32_to_cpu(wrb->frag_pa_lo);
852         if (frag_len) {
853                 if (unmap_single)
854                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
855                 else
856                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
857         }
858 }
859
860 /* Grab a WRB header for xmit */
861 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
862 {
863         u32 head = txo->q.head;
864
865         queue_head_inc(&txo->q);
866         return head;
867 }
868
869 /* Set up the WRB header for xmit */
870 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
871                                 struct be_tx_obj *txo,
872                                 struct be_wrb_params *wrb_params,
873                                 struct sk_buff *skb, u16 head)
874 {
875         u32 num_frags = skb_wrb_cnt(skb);
876         struct be_queue_info *txq = &txo->q;
877         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
878
879         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
880         be_dws_cpu_to_le(hdr, sizeof(*hdr));
881
882         BUG_ON(txo->sent_skb_list[head]);
883         txo->sent_skb_list[head] = skb;
884         txo->last_req_hdr = head;
885         atomic_add(num_frags, &txq->used);
886         txo->last_req_wrb_cnt = num_frags;
887         txo->pend_wrb_cnt += num_frags;
888 }
889
890 /* Setup a WRB fragment (buffer descriptor) for xmit */
891 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
892                                  int len)
893 {
894         struct be_eth_wrb *wrb;
895         struct be_queue_info *txq = &txo->q;
896
897         wrb = queue_head_node(txq);
898         wrb_fill(wrb, busaddr, len);
899         queue_head_inc(txq);
900 }
901
902 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
903  * was invoked. The producer index is restored to the previous packet and the
904  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
905  */
906 static void be_xmit_restore(struct be_adapter *adapter,
907                             struct be_tx_obj *txo, u32 head, bool map_single,
908                             u32 copied)
909 {
910         struct device *dev;
911         struct be_eth_wrb *wrb;
912         struct be_queue_info *txq = &txo->q;
913
914         dev = &adapter->pdev->dev;
915         txq->head = head;
916
917         /* skip the first wrb (hdr); it's not mapped */
918         queue_head_inc(txq);
919         while (copied) {
920                 wrb = queue_head_node(txq);
921                 unmap_tx_frag(dev, wrb, map_single);
922                 map_single = false;
923                 copied -= le32_to_cpu(wrb->frag_len);
924                 queue_head_inc(txq);
925         }
926
927         txq->head = head;
928 }
929
930 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
931  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
932  * of WRBs used up by the packet.
933  */
934 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
935                            struct sk_buff *skb,
936                            struct be_wrb_params *wrb_params)
937 {
938         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
939         struct device *dev = &adapter->pdev->dev;
940         struct be_queue_info *txq = &txo->q;
941         bool map_single = false;
942         u32 head = txq->head;
943         dma_addr_t busaddr;
944         int len;
945
946         head = be_tx_get_wrb_hdr(txo);
947
948         if (skb->len > skb->data_len) {
949                 len = skb_headlen(skb);
950
951                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
952                 if (dma_mapping_error(dev, busaddr))
953                         goto dma_err;
954                 map_single = true;
955                 be_tx_setup_wrb_frag(txo, busaddr, len);
956                 copied += len;
957         }
958
959         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
960                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
961                 len = skb_frag_size(frag);
962
963                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
964                 if (dma_mapping_error(dev, busaddr))
965                         goto dma_err;
966                 be_tx_setup_wrb_frag(txo, busaddr, len);
967                 copied += len;
968         }
969
970         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
971
972         be_tx_stats_update(txo, skb);
973         return wrb_cnt;
974
975 dma_err:
976         adapter->drv_stats.dma_map_errors++;
977         be_xmit_restore(adapter, txo, head, map_single, copied);
978         return 0;
979 }
980
981 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
982 {
983         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
984 }
985
986 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
987                                              struct sk_buff *skb,
988                                              struct be_wrb_params
989                                              *wrb_params)
990 {
991         u16 vlan_tag = 0;
992
993         skb = skb_share_check(skb, GFP_ATOMIC);
994         if (unlikely(!skb))
995                 return skb;
996
997         if (skb_vlan_tag_present(skb))
998                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
999
1000         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1001                 if (!vlan_tag)
1002                         vlan_tag = adapter->pvid;
1003                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1004                  * skip VLAN insertion
1005                  */
1006                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1007         }
1008
1009         if (vlan_tag) {
1010                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1011                                                 vlan_tag);
1012                 if (unlikely(!skb))
1013                         return skb;
1014                 skb->vlan_tci = 0;
1015         }
1016
1017         /* Insert the outer VLAN, if any */
1018         if (adapter->qnq_vid) {
1019                 vlan_tag = adapter->qnq_vid;
1020                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1021                                                 vlan_tag);
1022                 if (unlikely(!skb))
1023                         return skb;
1024                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1025         }
1026
1027         return skb;
1028 }
1029
1030 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1031 {
1032         struct ethhdr *eh = (struct ethhdr *)skb->data;
1033         u16 offset = ETH_HLEN;
1034
1035         if (eh->h_proto == htons(ETH_P_IPV6)) {
1036                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1037
1038                 offset += sizeof(struct ipv6hdr);
1039                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1040                     ip6h->nexthdr != NEXTHDR_UDP) {
1041                         struct ipv6_opt_hdr *ehdr =
1042                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1043
1044                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1045                         if (ehdr->hdrlen == 0xff)
1046                                 return true;
1047                 }
1048         }
1049         return false;
1050 }
1051
1052 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1053 {
1054         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1055 }
1056
1057 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1058 {
1059         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1060 }
1061
1062 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1063                                                   struct sk_buff *skb,
1064                                                   struct be_wrb_params
1065                                                   *wrb_params)
1066 {
1067         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1068         unsigned int eth_hdr_len;
1069         struct iphdr *ip;
1070
1071         /* For padded packets, BE HW modifies tot_len field in IP header
1072          * incorrecly when VLAN tag is inserted by HW.
1073          * For padded packets, Lancer computes incorrect checksum.
1074          */
1075         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1076                                                 VLAN_ETH_HLEN : ETH_HLEN;
1077         if (skb->len <= 60 &&
1078             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1079             is_ipv4_pkt(skb)) {
1080                 ip = (struct iphdr *)ip_hdr(skb);
1081                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1082         }
1083
1084         /* If vlan tag is already inlined in the packet, skip HW VLAN
1085          * tagging in pvid-tagging mode
1086          */
1087         if (be_pvid_tagging_enabled(adapter) &&
1088             veh->h_vlan_proto == htons(ETH_P_8021Q))
1089                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1090
1091         /* HW has a bug wherein it will calculate CSUM for VLAN
1092          * pkts even though it is disabled.
1093          * Manually insert VLAN in pkt.
1094          */
1095         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1096             skb_vlan_tag_present(skb)) {
1097                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1098                 if (unlikely(!skb))
1099                         goto err;
1100         }
1101
1102         /* HW may lockup when VLAN HW tagging is requested on
1103          * certain ipv6 packets. Drop such pkts if the HW workaround to
1104          * skip HW tagging is not enabled by FW.
1105          */
1106         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1107                      (adapter->pvid || adapter->qnq_vid) &&
1108                      !qnq_async_evt_rcvd(adapter)))
1109                 goto tx_drop;
1110
1111         /* Manual VLAN tag insertion to prevent:
1112          * ASIC lockup when the ASIC inserts VLAN tag into
1113          * certain ipv6 packets. Insert VLAN tags in driver,
1114          * and set event, completion, vlan bits accordingly
1115          * in the Tx WRB.
1116          */
1117         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1118             be_vlan_tag_tx_chk(adapter, skb)) {
1119                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1120                 if (unlikely(!skb))
1121                         goto err;
1122         }
1123
1124         return skb;
1125 tx_drop:
1126         dev_kfree_skb_any(skb);
1127 err:
1128         return NULL;
1129 }
1130
1131 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1132                                            struct sk_buff *skb,
1133                                            struct be_wrb_params *wrb_params)
1134 {
1135         int err;
1136
1137         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1138          * packets that are 32b or less may cause a transmit stall
1139          * on that port. The workaround is to pad such packets
1140          * (len <= 32 bytes) to a minimum length of 36b.
1141          */
1142         if (skb->len <= 32) {
1143                 if (skb_put_padto(skb, 36))
1144                         return NULL;
1145         }
1146
1147         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1148                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1149                 if (!skb)
1150                         return NULL;
1151         }
1152
1153         /* The stack can send us skbs with length greater than
1154          * what the HW can handle. Trim the extra bytes.
1155          */
1156         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1157         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1158         WARN_ON(err);
1159
1160         return skb;
1161 }
1162
1163 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1164 {
1165         struct be_queue_info *txq = &txo->q;
1166         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1167
1168         /* Mark the last request eventable if it hasn't been marked already */
1169         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1170                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1171
1172         /* compose a dummy wrb if there are odd set of wrbs to notify */
1173         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1174                 wrb_fill_dummy(queue_head_node(txq));
1175                 queue_head_inc(txq);
1176                 atomic_inc(&txq->used);
1177                 txo->pend_wrb_cnt++;
1178                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1179                                            TX_HDR_WRB_NUM_SHIFT);
1180                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1181                                           TX_HDR_WRB_NUM_SHIFT);
1182         }
1183         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1184         txo->pend_wrb_cnt = 0;
1185 }
1186
1187 /* OS2BMC related */
1188
1189 #define DHCP_CLIENT_PORT        68
1190 #define DHCP_SERVER_PORT        67
1191 #define NET_BIOS_PORT1          137
1192 #define NET_BIOS_PORT2          138
1193 #define DHCPV6_RAS_PORT         547
1194
1195 #define is_mc_allowed_on_bmc(adapter, eh)       \
1196         (!is_multicast_filt_enabled(adapter) && \
1197          is_multicast_ether_addr(eh->h_dest) && \
1198          !is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_bc_allowed_on_bmc(adapter, eh)       \
1201         (!is_broadcast_filt_enabled(adapter) && \
1202          is_broadcast_ether_addr(eh->h_dest))
1203
1204 #define is_arp_allowed_on_bmc(adapter, skb)     \
1205         (is_arp(skb) && is_arp_filt_enabled(adapter))
1206
1207 #define is_broadcast_packet(eh, adapter)        \
1208                 (is_multicast_ether_addr(eh->h_dest) && \
1209                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1210
1211 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1212
1213 #define is_arp_filt_enabled(adapter)    \
1214                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1215
1216 #define is_dhcp_client_filt_enabled(adapter)    \
1217                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1218
1219 #define is_dhcp_srvr_filt_enabled(adapter)      \
1220                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1221
1222 #define is_nbios_filt_enabled(adapter)  \
1223                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1224
1225 #define is_ipv6_na_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask &       \
1227                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1228
1229 #define is_ipv6_ra_filt_enabled(adapter)        \
1230                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1231
1232 #define is_ipv6_ras_filt_enabled(adapter)       \
1233                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1234
1235 #define is_broadcast_filt_enabled(adapter)      \
1236                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1237
1238 #define is_multicast_filt_enabled(adapter)      \
1239                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1240
1241 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1242                                struct sk_buff **skb)
1243 {
1244         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1245         bool os2bmc = false;
1246
1247         if (!be_is_os2bmc_enabled(adapter))
1248                 goto done;
1249
1250         if (!is_multicast_ether_addr(eh->h_dest))
1251                 goto done;
1252
1253         if (is_mc_allowed_on_bmc(adapter, eh) ||
1254             is_bc_allowed_on_bmc(adapter, eh) ||
1255             is_arp_allowed_on_bmc(adapter, (*skb))) {
1256                 os2bmc = true;
1257                 goto done;
1258         }
1259
1260         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1261                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1262                 u8 nexthdr = hdr->nexthdr;
1263
1264                 if (nexthdr == IPPROTO_ICMPV6) {
1265                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1266
1267                         switch (icmp6->icmp6_type) {
1268                         case NDISC_ROUTER_ADVERTISEMENT:
1269                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1270                                 goto done;
1271                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1272                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1273                                 goto done;
1274                         default:
1275                                 break;
1276                         }
1277                 }
1278         }
1279
1280         if (is_udp_pkt((*skb))) {
1281                 struct udphdr *udp = udp_hdr((*skb));
1282
1283                 switch (ntohs(udp->dest)) {
1284                 case DHCP_CLIENT_PORT:
1285                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1286                         goto done;
1287                 case DHCP_SERVER_PORT:
1288                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1289                         goto done;
1290                 case NET_BIOS_PORT1:
1291                 case NET_BIOS_PORT2:
1292                         os2bmc = is_nbios_filt_enabled(adapter);
1293                         goto done;
1294                 case DHCPV6_RAS_PORT:
1295                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1296                         goto done;
1297                 default:
1298                         break;
1299                 }
1300         }
1301 done:
1302         /* For packets over a vlan, which are destined
1303          * to BMC, asic expects the vlan to be inline in the packet.
1304          */
1305         if (os2bmc)
1306                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1307
1308         return os2bmc;
1309 }
1310
1311 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1312 {
1313         struct be_adapter *adapter = netdev_priv(netdev);
1314         u16 q_idx = skb_get_queue_mapping(skb);
1315         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1316         struct be_wrb_params wrb_params = { 0 };
1317         bool flush = !skb->xmit_more;
1318         u16 wrb_cnt;
1319
1320         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1321         if (unlikely(!skb))
1322                 goto drop;
1323
1324         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1325
1326         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1327         if (unlikely(!wrb_cnt)) {
1328                 dev_kfree_skb_any(skb);
1329                 goto drop;
1330         }
1331
1332         /* if os2bmc is enabled and if the pkt is destined to bmc,
1333          * enqueue the pkt a 2nd time with mgmt bit set.
1334          */
1335         if (be_send_pkt_to_bmc(adapter, &skb)) {
1336                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1337                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1338                 if (unlikely(!wrb_cnt))
1339                         goto drop;
1340                 else
1341                         skb_get(skb);
1342         }
1343
1344         if (be_is_txq_full(txo)) {
1345                 netif_stop_subqueue(netdev, q_idx);
1346                 tx_stats(txo)->tx_stops++;
1347         }
1348
1349         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1350                 be_xmit_flush(adapter, txo);
1351
1352         return NETDEV_TX_OK;
1353 drop:
1354         tx_stats(txo)->tx_drv_drops++;
1355         /* Flush the already enqueued tx requests */
1356         if (flush && txo->pend_wrb_cnt)
1357                 be_xmit_flush(adapter, txo);
1358
1359         return NETDEV_TX_OK;
1360 }
1361
1362 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1363 {
1364         struct be_adapter *adapter = netdev_priv(netdev);
1365         struct device *dev = &adapter->pdev->dev;
1366
1367         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1368                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1369                          BE_MIN_MTU, BE_MAX_MTU);
1370                 return -EINVAL;
1371         }
1372
1373         dev_info(dev, "MTU changed from %d to %d bytes\n",
1374                  netdev->mtu, new_mtu);
1375         netdev->mtu = new_mtu;
1376         return 0;
1377 }
1378
1379 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1380 {
1381         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1382                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1383 }
1384
1385 static int be_set_vlan_promisc(struct be_adapter *adapter)
1386 {
1387         struct device *dev = &adapter->pdev->dev;
1388         int status;
1389
1390         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1391                 return 0;
1392
1393         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1394         if (!status) {
1395                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1396                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1397         } else {
1398                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1399         }
1400         return status;
1401 }
1402
1403 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1404 {
1405         struct device *dev = &adapter->pdev->dev;
1406         int status;
1407
1408         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1409         if (!status) {
1410                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1411                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1412         }
1413         return status;
1414 }
1415
1416 /*
1417  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1418  * If the user configures more, place BE in vlan promiscuous mode.
1419  */
1420 static int be_vid_config(struct be_adapter *adapter)
1421 {
1422         struct device *dev = &adapter->pdev->dev;
1423         u16 vids[BE_NUM_VLANS_SUPPORTED];
1424         u16 num = 0, i = 0;
1425         int status = 0;
1426
1427         /* No need to change the VLAN state if the I/F is in promiscuous */
1428         if (adapter->netdev->flags & IFF_PROMISC)
1429                 return 0;
1430
1431         if (adapter->vlans_added > be_max_vlans(adapter))
1432                 return be_set_vlan_promisc(adapter);
1433
1434         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1435                 status = be_clear_vlan_promisc(adapter);
1436                 if (status)
1437                         return status;
1438         }
1439         /* Construct VLAN Table to give to HW */
1440         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1441                 vids[num++] = cpu_to_le16(i);
1442
1443         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1444         if (status) {
1445                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1446                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1447                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1448                     addl_status(status) ==
1449                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1450                         return be_set_vlan_promisc(adapter);
1451         }
1452         return status;
1453 }
1454
1455 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1456 {
1457         struct be_adapter *adapter = netdev_priv(netdev);
1458         int status = 0;
1459
1460         mutex_lock(&adapter->rx_filter_lock);
1461
1462         /* Packets with VID 0 are always received by Lancer by default */
1463         if (lancer_chip(adapter) && vid == 0)
1464                 goto done;
1465
1466         if (test_bit(vid, adapter->vids))
1467                 goto done;
1468
1469         set_bit(vid, adapter->vids);
1470         adapter->vlans_added++;
1471
1472         status = be_vid_config(adapter);
1473 done:
1474         mutex_unlock(&adapter->rx_filter_lock);
1475         return status;
1476 }
1477
1478 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1479 {
1480         struct be_adapter *adapter = netdev_priv(netdev);
1481         int status = 0;
1482
1483         mutex_lock(&adapter->rx_filter_lock);
1484
1485         /* Packets with VID 0 are always received by Lancer by default */
1486         if (lancer_chip(adapter) && vid == 0)
1487                 goto done;
1488
1489         if (!test_bit(vid, adapter->vids))
1490                 goto done;
1491
1492         clear_bit(vid, adapter->vids);
1493         adapter->vlans_added--;
1494
1495         status = be_vid_config(adapter);
1496 done:
1497         mutex_unlock(&adapter->rx_filter_lock);
1498         return status;
1499 }
1500
1501 static void be_set_all_promisc(struct be_adapter *adapter)
1502 {
1503         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1504         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1505 }
1506
1507 static void be_set_mc_promisc(struct be_adapter *adapter)
1508 {
1509         int status;
1510
1511         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1512                 return;
1513
1514         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1515         if (!status)
1516                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1517 }
1518
1519 static void be_set_uc_promisc(struct be_adapter *adapter)
1520 {
1521         int status;
1522
1523         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1524                 return;
1525
1526         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1527         if (!status)
1528                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1529 }
1530
1531 static void be_clear_uc_promisc(struct be_adapter *adapter)
1532 {
1533         int status;
1534
1535         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1536                 return;
1537
1538         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1539         if (!status)
1540                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1541 }
1542
1543 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1544  * We use a single callback function for both sync and unsync. We really don't
1545  * add/remove addresses through this callback. But, we use it to detect changes
1546  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1547  */
1548 static int be_uc_list_update(struct net_device *netdev,
1549                              const unsigned char *addr)
1550 {
1551         struct be_adapter *adapter = netdev_priv(netdev);
1552
1553         adapter->update_uc_list = true;
1554         return 0;
1555 }
1556
1557 static int be_mc_list_update(struct net_device *netdev,
1558                              const unsigned char *addr)
1559 {
1560         struct be_adapter *adapter = netdev_priv(netdev);
1561
1562         adapter->update_mc_list = true;
1563         return 0;
1564 }
1565
1566 static void be_set_mc_list(struct be_adapter *adapter)
1567 {
1568         struct net_device *netdev = adapter->netdev;
1569         struct netdev_hw_addr *ha;
1570         bool mc_promisc = false;
1571         int status;
1572
1573         netif_addr_lock_bh(netdev);
1574         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1575
1576         if (netdev->flags & IFF_PROMISC) {
1577                 adapter->update_mc_list = false;
1578         } else if (netdev->flags & IFF_ALLMULTI ||
1579                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1580                 /* Enable multicast promisc if num configured exceeds
1581                  * what we support
1582                  */
1583                 mc_promisc = true;
1584                 adapter->update_mc_list = false;
1585         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1586                 /* Update mc-list unconditionally if the iface was previously
1587                  * in mc-promisc mode and now is out of that mode.
1588                  */
1589                 adapter->update_mc_list = true;
1590         }
1591
1592         if (adapter->update_mc_list) {
1593                 int i = 0;
1594
1595                 /* cache the mc-list in adapter */
1596                 netdev_for_each_mc_addr(ha, netdev) {
1597                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1598                         i++;
1599                 }
1600                 adapter->mc_count = netdev_mc_count(netdev);
1601         }
1602         netif_addr_unlock_bh(netdev);
1603
1604         if (mc_promisc) {
1605                 be_set_mc_promisc(adapter);
1606         } else if (adapter->update_mc_list) {
1607                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1608                 if (!status)
1609                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1610                 else
1611                         be_set_mc_promisc(adapter);
1612
1613                 adapter->update_mc_list = false;
1614         }
1615 }
1616
1617 static void be_clear_mc_list(struct be_adapter *adapter)
1618 {
1619         struct net_device *netdev = adapter->netdev;
1620
1621         __dev_mc_unsync(netdev, NULL);
1622         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1623         adapter->mc_count = 0;
1624 }
1625
1626 static void be_set_uc_list(struct be_adapter *adapter)
1627 {
1628         struct net_device *netdev = adapter->netdev;
1629         struct netdev_hw_addr *ha;
1630         bool uc_promisc = false;
1631         int curr_uc_macs = 0, i;
1632
1633         netif_addr_lock_bh(netdev);
1634         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1635
1636         if (netdev->flags & IFF_PROMISC) {
1637                 adapter->update_uc_list = false;
1638         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1639                 uc_promisc = true;
1640                 adapter->update_uc_list = false;
1641         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1642                 /* Update uc-list unconditionally if the iface was previously
1643                  * in uc-promisc mode and now is out of that mode.
1644                  */
1645                 adapter->update_uc_list = true;
1646         }
1647
1648         if (adapter->update_uc_list) {
1649                 i = 1; /* First slot is claimed by the Primary MAC */
1650
1651                 /* cache the uc-list in adapter array */
1652                 netdev_for_each_uc_addr(ha, netdev) {
1653                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1654                         i++;
1655                 }
1656                 curr_uc_macs = netdev_uc_count(netdev);
1657         }
1658         netif_addr_unlock_bh(netdev);
1659
1660         if (uc_promisc) {
1661                 be_set_uc_promisc(adapter);
1662         } else if (adapter->update_uc_list) {
1663                 be_clear_uc_promisc(adapter);
1664
1665                 for (i = 0; i < adapter->uc_macs; i++)
1666                         be_cmd_pmac_del(adapter, adapter->if_handle,
1667                                         adapter->pmac_id[i + 1], 0);
1668
1669                 for (i = 0; i < curr_uc_macs; i++)
1670                         be_cmd_pmac_add(adapter, adapter->uc_list[i].mac,
1671                                         adapter->if_handle,
1672                                         &adapter->pmac_id[i + 1], 0);
1673                 adapter->uc_macs = curr_uc_macs;
1674                 adapter->update_uc_list = false;
1675         }
1676 }
1677
1678 static void be_clear_uc_list(struct be_adapter *adapter)
1679 {
1680         struct net_device *netdev = adapter->netdev;
1681         int i;
1682
1683         __dev_uc_unsync(netdev, NULL);
1684         for (i = 0; i < adapter->uc_macs; i++)
1685                 be_cmd_pmac_del(adapter, adapter->if_handle,
1686                                 adapter->pmac_id[i + 1], 0);
1687         adapter->uc_macs = 0;
1688 }
1689
1690 static void __be_set_rx_mode(struct be_adapter *adapter)
1691 {
1692         struct net_device *netdev = adapter->netdev;
1693
1694         mutex_lock(&adapter->rx_filter_lock);
1695
1696         if (netdev->flags & IFF_PROMISC) {
1697                 if (!be_in_all_promisc(adapter))
1698                         be_set_all_promisc(adapter);
1699         } else if (be_in_all_promisc(adapter)) {
1700                 /* We need to re-program the vlan-list or clear
1701                  * vlan-promisc mode (if needed) when the interface
1702                  * comes out of promisc mode.
1703                  */
1704                 be_vid_config(adapter);
1705         }
1706
1707         be_set_uc_list(adapter);
1708         be_set_mc_list(adapter);
1709
1710         mutex_unlock(&adapter->rx_filter_lock);
1711 }
1712
1713 static void be_work_set_rx_mode(struct work_struct *work)
1714 {
1715         struct be_cmd_work *cmd_work =
1716                                 container_of(work, struct be_cmd_work, work);
1717
1718         __be_set_rx_mode(cmd_work->adapter);
1719         kfree(cmd_work);
1720 }
1721
1722 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1723 {
1724         struct be_adapter *adapter = netdev_priv(netdev);
1725         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1726         int status;
1727
1728         if (!sriov_enabled(adapter))
1729                 return -EPERM;
1730
1731         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1732                 return -EINVAL;
1733
1734         /* Proceed further only if user provided MAC is different
1735          * from active MAC
1736          */
1737         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1738                 return 0;
1739
1740         if (BEx_chip(adapter)) {
1741                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1742                                 vf + 1);
1743
1744                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1745                                          &vf_cfg->pmac_id, vf + 1);
1746         } else {
1747                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1748                                         vf + 1);
1749         }
1750
1751         if (status) {
1752                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1753                         mac, vf, status);
1754                 return be_cmd_status(status);
1755         }
1756
1757         ether_addr_copy(vf_cfg->mac_addr, mac);
1758
1759         return 0;
1760 }
1761
1762 static int be_get_vf_config(struct net_device *netdev, int vf,
1763                             struct ifla_vf_info *vi)
1764 {
1765         struct be_adapter *adapter = netdev_priv(netdev);
1766         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1767
1768         if (!sriov_enabled(adapter))
1769                 return -EPERM;
1770
1771         if (vf >= adapter->num_vfs)
1772                 return -EINVAL;
1773
1774         vi->vf = vf;
1775         vi->max_tx_rate = vf_cfg->tx_rate;
1776         vi->min_tx_rate = 0;
1777         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1778         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1779         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1780         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1781         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1782
1783         return 0;
1784 }
1785
1786 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1787 {
1788         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1789         u16 vids[BE_NUM_VLANS_SUPPORTED];
1790         int vf_if_id = vf_cfg->if_handle;
1791         int status;
1792
1793         /* Enable Transparent VLAN Tagging */
1794         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1795         if (status)
1796                 return status;
1797
1798         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1799         vids[0] = 0;
1800         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1801         if (!status)
1802                 dev_info(&adapter->pdev->dev,
1803                          "Cleared guest VLANs on VF%d", vf);
1804
1805         /* After TVT is enabled, disallow VFs to program VLAN filters */
1806         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1807                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1808                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1809                 if (!status)
1810                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1811         }
1812         return 0;
1813 }
1814
1815 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1816 {
1817         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1818         struct device *dev = &adapter->pdev->dev;
1819         int status;
1820
1821         /* Reset Transparent VLAN Tagging. */
1822         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1823                                        vf_cfg->if_handle, 0, 0);
1824         if (status)
1825                 return status;
1826
1827         /* Allow VFs to program VLAN filtering */
1828         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1829                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1830                                                   BE_PRIV_FILTMGMT, vf + 1);
1831                 if (!status) {
1832                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1833                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1834                 }
1835         }
1836
1837         dev_info(dev,
1838                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1839         return 0;
1840 }
1841
1842 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1843 {
1844         struct be_adapter *adapter = netdev_priv(netdev);
1845         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1846         int status;
1847
1848         if (!sriov_enabled(adapter))
1849                 return -EPERM;
1850
1851         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1852                 return -EINVAL;
1853
1854         if (vlan || qos) {
1855                 vlan |= qos << VLAN_PRIO_SHIFT;
1856                 status = be_set_vf_tvt(adapter, vf, vlan);
1857         } else {
1858                 status = be_clear_vf_tvt(adapter, vf);
1859         }
1860
1861         if (status) {
1862                 dev_err(&adapter->pdev->dev,
1863                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1864                         status);
1865                 return be_cmd_status(status);
1866         }
1867
1868         vf_cfg->vlan_tag = vlan;
1869         return 0;
1870 }
1871
1872 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1873                              int min_tx_rate, int max_tx_rate)
1874 {
1875         struct be_adapter *adapter = netdev_priv(netdev);
1876         struct device *dev = &adapter->pdev->dev;
1877         int percent_rate, status = 0;
1878         u16 link_speed = 0;
1879         u8 link_status;
1880
1881         if (!sriov_enabled(adapter))
1882                 return -EPERM;
1883
1884         if (vf >= adapter->num_vfs)
1885                 return -EINVAL;
1886
1887         if (min_tx_rate)
1888                 return -EINVAL;
1889
1890         if (!max_tx_rate)
1891                 goto config_qos;
1892
1893         status = be_cmd_link_status_query(adapter, &link_speed,
1894                                           &link_status, 0);
1895         if (status)
1896                 goto err;
1897
1898         if (!link_status) {
1899                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1900                 status = -ENETDOWN;
1901                 goto err;
1902         }
1903
1904         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1905                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1906                         link_speed);
1907                 status = -EINVAL;
1908                 goto err;
1909         }
1910
1911         /* On Skyhawk the QOS setting must be done only as a % value */
1912         percent_rate = link_speed / 100;
1913         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1914                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1915                         percent_rate);
1916                 status = -EINVAL;
1917                 goto err;
1918         }
1919
1920 config_qos:
1921         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1922         if (status)
1923                 goto err;
1924
1925         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1926         return 0;
1927
1928 err:
1929         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1930                 max_tx_rate, vf);
1931         return be_cmd_status(status);
1932 }
1933
1934 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1935                                 int link_state)
1936 {
1937         struct be_adapter *adapter = netdev_priv(netdev);
1938         int status;
1939
1940         if (!sriov_enabled(adapter))
1941                 return -EPERM;
1942
1943         if (vf >= adapter->num_vfs)
1944                 return -EINVAL;
1945
1946         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1947         if (status) {
1948                 dev_err(&adapter->pdev->dev,
1949                         "Link state change on VF %d failed: %#x\n", vf, status);
1950                 return be_cmd_status(status);
1951         }
1952
1953         adapter->vf_cfg[vf].plink_tracking = link_state;
1954
1955         return 0;
1956 }
1957
1958 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1959 {
1960         struct be_adapter *adapter = netdev_priv(netdev);
1961         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1962         u8 spoofchk;
1963         int status;
1964
1965         if (!sriov_enabled(adapter))
1966                 return -EPERM;
1967
1968         if (vf >= adapter->num_vfs)
1969                 return -EINVAL;
1970
1971         if (BEx_chip(adapter))
1972                 return -EOPNOTSUPP;
1973
1974         if (enable == vf_cfg->spoofchk)
1975                 return 0;
1976
1977         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1978
1979         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1980                                        0, spoofchk);
1981         if (status) {
1982                 dev_err(&adapter->pdev->dev,
1983                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1984                 return be_cmd_status(status);
1985         }
1986
1987         vf_cfg->spoofchk = enable;
1988         return 0;
1989 }
1990
1991 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1992                           ulong now)
1993 {
1994         aic->rx_pkts_prev = rx_pkts;
1995         aic->tx_reqs_prev = tx_pkts;
1996         aic->jiffies = now;
1997 }
1998
1999 static int be_get_new_eqd(struct be_eq_obj *eqo)
2000 {
2001         struct be_adapter *adapter = eqo->adapter;
2002         int eqd, start;
2003         struct be_aic_obj *aic;
2004         struct be_rx_obj *rxo;
2005         struct be_tx_obj *txo;
2006         u64 rx_pkts = 0, tx_pkts = 0;
2007         ulong now;
2008         u32 pps, delta;
2009         int i;
2010
2011         aic = &adapter->aic_obj[eqo->idx];
2012         if (!aic->enable) {
2013                 if (aic->jiffies)
2014                         aic->jiffies = 0;
2015                 eqd = aic->et_eqd;
2016                 return eqd;
2017         }
2018
2019         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2020                 do {
2021                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2022                         rx_pkts += rxo->stats.rx_pkts;
2023                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2024         }
2025
2026         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2027                 do {
2028                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2029                         tx_pkts += txo->stats.tx_reqs;
2030                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2031         }
2032
2033         /* Skip, if wrapped around or first calculation */
2034         now = jiffies;
2035         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2036             rx_pkts < aic->rx_pkts_prev ||
2037             tx_pkts < aic->tx_reqs_prev) {
2038                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2039                 return aic->prev_eqd;
2040         }
2041
2042         delta = jiffies_to_msecs(now - aic->jiffies);
2043         if (delta == 0)
2044                 return aic->prev_eqd;
2045
2046         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2047                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2048         eqd = (pps / 15000) << 2;
2049
2050         if (eqd < 8)
2051                 eqd = 0;
2052         eqd = min_t(u32, eqd, aic->max_eqd);
2053         eqd = max_t(u32, eqd, aic->min_eqd);
2054
2055         be_aic_update(aic, rx_pkts, tx_pkts, now);
2056
2057         return eqd;
2058 }
2059
2060 /* For Skyhawk-R only */
2061 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2062 {
2063         struct be_adapter *adapter = eqo->adapter;
2064         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2065         ulong now = jiffies;
2066         int eqd;
2067         u32 mult_enc;
2068
2069         if (!aic->enable)
2070                 return 0;
2071
2072         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2073                 eqd = aic->prev_eqd;
2074         else
2075                 eqd = be_get_new_eqd(eqo);
2076
2077         if (eqd > 100)
2078                 mult_enc = R2I_DLY_ENC_1;
2079         else if (eqd > 60)
2080                 mult_enc = R2I_DLY_ENC_2;
2081         else if (eqd > 20)
2082                 mult_enc = R2I_DLY_ENC_3;
2083         else
2084                 mult_enc = R2I_DLY_ENC_0;
2085
2086         aic->prev_eqd = eqd;
2087
2088         return mult_enc;
2089 }
2090
2091 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2092 {
2093         struct be_set_eqd set_eqd[MAX_EVT_QS];
2094         struct be_aic_obj *aic;
2095         struct be_eq_obj *eqo;
2096         int i, num = 0, eqd;
2097
2098         for_all_evt_queues(adapter, eqo, i) {
2099                 aic = &adapter->aic_obj[eqo->idx];
2100                 eqd = be_get_new_eqd(eqo);
2101                 if (force_update || eqd != aic->prev_eqd) {
2102                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2103                         set_eqd[num].eq_id = eqo->q.id;
2104                         aic->prev_eqd = eqd;
2105                         num++;
2106                 }
2107         }
2108
2109         if (num)
2110                 be_cmd_modify_eqd(adapter, set_eqd, num);
2111 }
2112
2113 static void be_rx_stats_update(struct be_rx_obj *rxo,
2114                                struct be_rx_compl_info *rxcp)
2115 {
2116         struct be_rx_stats *stats = rx_stats(rxo);
2117
2118         u64_stats_update_begin(&stats->sync);
2119         stats->rx_compl++;
2120         stats->rx_bytes += rxcp->pkt_size;
2121         stats->rx_pkts++;
2122         if (rxcp->tunneled)
2123                 stats->rx_vxlan_offload_pkts++;
2124         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2125                 stats->rx_mcast_pkts++;
2126         if (rxcp->err)
2127                 stats->rx_compl_err++;
2128         u64_stats_update_end(&stats->sync);
2129 }
2130
2131 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2132 {
2133         /* L4 checksum is not reliable for non TCP/UDP packets.
2134          * Also ignore ipcksm for ipv6 pkts
2135          */
2136         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2137                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2138 }
2139
2140 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2141 {
2142         struct be_adapter *adapter = rxo->adapter;
2143         struct be_rx_page_info *rx_page_info;
2144         struct be_queue_info *rxq = &rxo->q;
2145         u32 frag_idx = rxq->tail;
2146
2147         rx_page_info = &rxo->page_info_tbl[frag_idx];
2148         BUG_ON(!rx_page_info->page);
2149
2150         if (rx_page_info->last_frag) {
2151                 dma_unmap_page(&adapter->pdev->dev,
2152                                dma_unmap_addr(rx_page_info, bus),
2153                                adapter->big_page_size, DMA_FROM_DEVICE);
2154                 rx_page_info->last_frag = false;
2155         } else {
2156                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2157                                         dma_unmap_addr(rx_page_info, bus),
2158                                         rx_frag_size, DMA_FROM_DEVICE);
2159         }
2160
2161         queue_tail_inc(rxq);
2162         atomic_dec(&rxq->used);
2163         return rx_page_info;
2164 }
2165
2166 /* Throwaway the data in the Rx completion */
2167 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2168                                 struct be_rx_compl_info *rxcp)
2169 {
2170         struct be_rx_page_info *page_info;
2171         u16 i, num_rcvd = rxcp->num_rcvd;
2172
2173         for (i = 0; i < num_rcvd; i++) {
2174                 page_info = get_rx_page_info(rxo);
2175                 put_page(page_info->page);
2176                 memset(page_info, 0, sizeof(*page_info));
2177         }
2178 }
2179
2180 /*
2181  * skb_fill_rx_data forms a complete skb for an ether frame
2182  * indicated by rxcp.
2183  */
2184 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2185                              struct be_rx_compl_info *rxcp)
2186 {
2187         struct be_rx_page_info *page_info;
2188         u16 i, j;
2189         u16 hdr_len, curr_frag_len, remaining;
2190         u8 *start;
2191
2192         page_info = get_rx_page_info(rxo);
2193         start = page_address(page_info->page) + page_info->page_offset;
2194         prefetch(start);
2195
2196         /* Copy data in the first descriptor of this completion */
2197         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2198
2199         skb->len = curr_frag_len;
2200         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2201                 memcpy(skb->data, start, curr_frag_len);
2202                 /* Complete packet has now been moved to data */
2203                 put_page(page_info->page);
2204                 skb->data_len = 0;
2205                 skb->tail += curr_frag_len;
2206         } else {
2207                 hdr_len = ETH_HLEN;
2208                 memcpy(skb->data, start, hdr_len);
2209                 skb_shinfo(skb)->nr_frags = 1;
2210                 skb_frag_set_page(skb, 0, page_info->page);
2211                 skb_shinfo(skb)->frags[0].page_offset =
2212                                         page_info->page_offset + hdr_len;
2213                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2214                                   curr_frag_len - hdr_len);
2215                 skb->data_len = curr_frag_len - hdr_len;
2216                 skb->truesize += rx_frag_size;
2217                 skb->tail += hdr_len;
2218         }
2219         page_info->page = NULL;
2220
2221         if (rxcp->pkt_size <= rx_frag_size) {
2222                 BUG_ON(rxcp->num_rcvd != 1);
2223                 return;
2224         }
2225
2226         /* More frags present for this completion */
2227         remaining = rxcp->pkt_size - curr_frag_len;
2228         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2229                 page_info = get_rx_page_info(rxo);
2230                 curr_frag_len = min(remaining, rx_frag_size);
2231
2232                 /* Coalesce all frags from the same physical page in one slot */
2233                 if (page_info->page_offset == 0) {
2234                         /* Fresh page */
2235                         j++;
2236                         skb_frag_set_page(skb, j, page_info->page);
2237                         skb_shinfo(skb)->frags[j].page_offset =
2238                                                         page_info->page_offset;
2239                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2240                         skb_shinfo(skb)->nr_frags++;
2241                 } else {
2242                         put_page(page_info->page);
2243                 }
2244
2245                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2246                 skb->len += curr_frag_len;
2247                 skb->data_len += curr_frag_len;
2248                 skb->truesize += rx_frag_size;
2249                 remaining -= curr_frag_len;
2250                 page_info->page = NULL;
2251         }
2252         BUG_ON(j > MAX_SKB_FRAGS);
2253 }
2254
2255 /* Process the RX completion indicated by rxcp when GRO is disabled */
2256 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2257                                 struct be_rx_compl_info *rxcp)
2258 {
2259         struct be_adapter *adapter = rxo->adapter;
2260         struct net_device *netdev = adapter->netdev;
2261         struct sk_buff *skb;
2262
2263         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2264         if (unlikely(!skb)) {
2265                 rx_stats(rxo)->rx_drops_no_skbs++;
2266                 be_rx_compl_discard(rxo, rxcp);
2267                 return;
2268         }
2269
2270         skb_fill_rx_data(rxo, skb, rxcp);
2271
2272         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2273                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2274         else
2275                 skb_checksum_none_assert(skb);
2276
2277         skb->protocol = eth_type_trans(skb, netdev);
2278         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2279         if (netdev->features & NETIF_F_RXHASH)
2280                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2281
2282         skb->csum_level = rxcp->tunneled;
2283         skb_mark_napi_id(skb, napi);
2284
2285         if (rxcp->vlanf)
2286                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2287
2288         netif_receive_skb(skb);
2289 }
2290
2291 /* Process the RX completion indicated by rxcp when GRO is enabled */
2292 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2293                                     struct napi_struct *napi,
2294                                     struct be_rx_compl_info *rxcp)
2295 {
2296         struct be_adapter *adapter = rxo->adapter;
2297         struct be_rx_page_info *page_info;
2298         struct sk_buff *skb = NULL;
2299         u16 remaining, curr_frag_len;
2300         u16 i, j;
2301
2302         skb = napi_get_frags(napi);
2303         if (!skb) {
2304                 be_rx_compl_discard(rxo, rxcp);
2305                 return;
2306         }
2307
2308         remaining = rxcp->pkt_size;
2309         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2310                 page_info = get_rx_page_info(rxo);
2311
2312                 curr_frag_len = min(remaining, rx_frag_size);
2313
2314                 /* Coalesce all frags from the same physical page in one slot */
2315                 if (i == 0 || page_info->page_offset == 0) {
2316                         /* First frag or Fresh page */
2317                         j++;
2318                         skb_frag_set_page(skb, j, page_info->page);
2319                         skb_shinfo(skb)->frags[j].page_offset =
2320                                                         page_info->page_offset;
2321                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2322                 } else {
2323                         put_page(page_info->page);
2324                 }
2325                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2326                 skb->truesize += rx_frag_size;
2327                 remaining -= curr_frag_len;
2328                 memset(page_info, 0, sizeof(*page_info));
2329         }
2330         BUG_ON(j > MAX_SKB_FRAGS);
2331
2332         skb_shinfo(skb)->nr_frags = j + 1;
2333         skb->len = rxcp->pkt_size;
2334         skb->data_len = rxcp->pkt_size;
2335         skb->ip_summed = CHECKSUM_UNNECESSARY;
2336         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2337         if (adapter->netdev->features & NETIF_F_RXHASH)
2338                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2339
2340         skb->csum_level = rxcp->tunneled;
2341
2342         if (rxcp->vlanf)
2343                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344
2345         napi_gro_frags(napi);
2346 }
2347
2348 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2349                                  struct be_rx_compl_info *rxcp)
2350 {
2351         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2352         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2353         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2354         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2355         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2356         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2357         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2358         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2359         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2360         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2361         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2362         if (rxcp->vlanf) {
2363                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2364                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2365         }
2366         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2367         rxcp->tunneled =
2368                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2369 }
2370
2371 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2372                                  struct be_rx_compl_info *rxcp)
2373 {
2374         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2375         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2376         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2377         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2378         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2379         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2380         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2381         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2382         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2383         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2384         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2385         if (rxcp->vlanf) {
2386                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2387                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2388         }
2389         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2390         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2391 }
2392
2393 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2394 {
2395         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2396         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2397         struct be_adapter *adapter = rxo->adapter;
2398
2399         /* For checking the valid bit it is Ok to use either definition as the
2400          * valid bit is at the same position in both v0 and v1 Rx compl */
2401         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2402                 return NULL;
2403
2404         rmb();
2405         be_dws_le_to_cpu(compl, sizeof(*compl));
2406
2407         if (adapter->be3_native)
2408                 be_parse_rx_compl_v1(compl, rxcp);
2409         else
2410                 be_parse_rx_compl_v0(compl, rxcp);
2411
2412         if (rxcp->ip_frag)
2413                 rxcp->l4_csum = 0;
2414
2415         if (rxcp->vlanf) {
2416                 /* In QNQ modes, if qnq bit is not set, then the packet was
2417                  * tagged only with the transparent outer vlan-tag and must
2418                  * not be treated as a vlan packet by host
2419                  */
2420                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2421                         rxcp->vlanf = 0;
2422
2423                 if (!lancer_chip(adapter))
2424                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2425
2426                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2427                     !test_bit(rxcp->vlan_tag, adapter->vids))
2428                         rxcp->vlanf = 0;
2429         }
2430
2431         /* As the compl has been parsed, reset it; we wont touch it again */
2432         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2433
2434         queue_tail_inc(&rxo->cq);
2435         return rxcp;
2436 }
2437
2438 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2439 {
2440         u32 order = get_order(size);
2441
2442         if (order > 0)
2443                 gfp |= __GFP_COMP;
2444         return  alloc_pages(gfp, order);
2445 }
2446
2447 /*
2448  * Allocate a page, split it to fragments of size rx_frag_size and post as
2449  * receive buffers to BE
2450  */
2451 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2452 {
2453         struct be_adapter *adapter = rxo->adapter;
2454         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2455         struct be_queue_info *rxq = &rxo->q;
2456         struct page *pagep = NULL;
2457         struct device *dev = &adapter->pdev->dev;
2458         struct be_eth_rx_d *rxd;
2459         u64 page_dmaaddr = 0, frag_dmaaddr;
2460         u32 posted, page_offset = 0, notify = 0;
2461
2462         page_info = &rxo->page_info_tbl[rxq->head];
2463         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2464                 if (!pagep) {
2465                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2466                         if (unlikely(!pagep)) {
2467                                 rx_stats(rxo)->rx_post_fail++;
2468                                 break;
2469                         }
2470                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2471                                                     adapter->big_page_size,
2472                                                     DMA_FROM_DEVICE);
2473                         if (dma_mapping_error(dev, page_dmaaddr)) {
2474                                 put_page(pagep);
2475                                 pagep = NULL;
2476                                 adapter->drv_stats.dma_map_errors++;
2477                                 break;
2478                         }
2479                         page_offset = 0;
2480                 } else {
2481                         get_page(pagep);
2482                         page_offset += rx_frag_size;
2483                 }
2484                 page_info->page_offset = page_offset;
2485                 page_info->page = pagep;
2486
2487                 rxd = queue_head_node(rxq);
2488                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2489                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2490                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2491
2492                 /* Any space left in the current big page for another frag? */
2493                 if ((page_offset + rx_frag_size + rx_frag_size) >
2494                                         adapter->big_page_size) {
2495                         pagep = NULL;
2496                         page_info->last_frag = true;
2497                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2498                 } else {
2499                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2500                 }
2501
2502                 prev_page_info = page_info;
2503                 queue_head_inc(rxq);
2504                 page_info = &rxo->page_info_tbl[rxq->head];
2505         }
2506
2507         /* Mark the last frag of a page when we break out of the above loop
2508          * with no more slots available in the RXQ
2509          */
2510         if (pagep) {
2511                 prev_page_info->last_frag = true;
2512                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2513         }
2514
2515         if (posted) {
2516                 atomic_add(posted, &rxq->used);
2517                 if (rxo->rx_post_starved)
2518                         rxo->rx_post_starved = false;
2519                 do {
2520                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2521                         be_rxq_notify(adapter, rxq->id, notify);
2522                         posted -= notify;
2523                 } while (posted);
2524         } else if (atomic_read(&rxq->used) == 0) {
2525                 /* Let be_worker replenish when memory is available */
2526                 rxo->rx_post_starved = true;
2527         }
2528 }
2529
2530 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2531 {
2532         struct be_queue_info *tx_cq = &txo->cq;
2533         struct be_tx_compl_info *txcp = &txo->txcp;
2534         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2535
2536         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2537                 return NULL;
2538
2539         /* Ensure load ordering of valid bit dword and other dwords below */
2540         rmb();
2541         be_dws_le_to_cpu(compl, sizeof(*compl));
2542
2543         txcp->status = GET_TX_COMPL_BITS(status, compl);
2544         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2545
2546         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2547         queue_tail_inc(tx_cq);
2548         return txcp;
2549 }
2550
2551 static u16 be_tx_compl_process(struct be_adapter *adapter,
2552                                struct be_tx_obj *txo, u16 last_index)
2553 {
2554         struct sk_buff **sent_skbs = txo->sent_skb_list;
2555         struct be_queue_info *txq = &txo->q;
2556         struct sk_buff *skb = NULL;
2557         bool unmap_skb_hdr = false;
2558         struct be_eth_wrb *wrb;
2559         u16 num_wrbs = 0;
2560         u32 frag_index;
2561
2562         do {
2563                 if (sent_skbs[txq->tail]) {
2564                         /* Free skb from prev req */
2565                         if (skb)
2566                                 dev_consume_skb_any(skb);
2567                         skb = sent_skbs[txq->tail];
2568                         sent_skbs[txq->tail] = NULL;
2569                         queue_tail_inc(txq);  /* skip hdr wrb */
2570                         num_wrbs++;
2571                         unmap_skb_hdr = true;
2572                 }
2573                 wrb = queue_tail_node(txq);
2574                 frag_index = txq->tail;
2575                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2576                               (unmap_skb_hdr && skb_headlen(skb)));
2577                 unmap_skb_hdr = false;
2578                 queue_tail_inc(txq);
2579                 num_wrbs++;
2580         } while (frag_index != last_index);
2581         dev_consume_skb_any(skb);
2582
2583         return num_wrbs;
2584 }
2585
2586 /* Return the number of events in the event queue */
2587 static inline int events_get(struct be_eq_obj *eqo)
2588 {
2589         struct be_eq_entry *eqe;
2590         int num = 0;
2591
2592         do {
2593                 eqe = queue_tail_node(&eqo->q);
2594                 if (eqe->evt == 0)
2595                         break;
2596
2597                 rmb();
2598                 eqe->evt = 0;
2599                 num++;
2600                 queue_tail_inc(&eqo->q);
2601         } while (true);
2602
2603         return num;
2604 }
2605
2606 /* Leaves the EQ is disarmed state */
2607 static void be_eq_clean(struct be_eq_obj *eqo)
2608 {
2609         int num = events_get(eqo);
2610
2611         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2612 }
2613
2614 /* Free posted rx buffers that were not used */
2615 static void be_rxq_clean(struct be_rx_obj *rxo)
2616 {
2617         struct be_queue_info *rxq = &rxo->q;
2618         struct be_rx_page_info *page_info;
2619
2620         while (atomic_read(&rxq->used) > 0) {
2621                 page_info = get_rx_page_info(rxo);
2622                 put_page(page_info->page);
2623                 memset(page_info, 0, sizeof(*page_info));
2624         }
2625         BUG_ON(atomic_read(&rxq->used));
2626         rxq->tail = 0;
2627         rxq->head = 0;
2628 }
2629
2630 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2631 {
2632         struct be_queue_info *rx_cq = &rxo->cq;
2633         struct be_rx_compl_info *rxcp;
2634         struct be_adapter *adapter = rxo->adapter;
2635         int flush_wait = 0;
2636
2637         /* Consume pending rx completions.
2638          * Wait for the flush completion (identified by zero num_rcvd)
2639          * to arrive. Notify CQ even when there are no more CQ entries
2640          * for HW to flush partially coalesced CQ entries.
2641          * In Lancer, there is no need to wait for flush compl.
2642          */
2643         for (;;) {
2644                 rxcp = be_rx_compl_get(rxo);
2645                 if (!rxcp) {
2646                         if (lancer_chip(adapter))
2647                                 break;
2648
2649                         if (flush_wait++ > 50 ||
2650                             be_check_error(adapter,
2651                                            BE_ERROR_HW)) {
2652                                 dev_warn(&adapter->pdev->dev,
2653                                          "did not receive flush compl\n");
2654                                 break;
2655                         }
2656                         be_cq_notify(adapter, rx_cq->id, true, 0);
2657                         mdelay(1);
2658                 } else {
2659                         be_rx_compl_discard(rxo, rxcp);
2660                         be_cq_notify(adapter, rx_cq->id, false, 1);
2661                         if (rxcp->num_rcvd == 0)
2662                                 break;
2663                 }
2664         }
2665
2666         /* After cleanup, leave the CQ in unarmed state */
2667         be_cq_notify(adapter, rx_cq->id, false, 0);
2668 }
2669
2670 static void be_tx_compl_clean(struct be_adapter *adapter)
2671 {
2672         struct device *dev = &adapter->pdev->dev;
2673         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2674         struct be_tx_compl_info *txcp;
2675         struct be_queue_info *txq;
2676         u32 end_idx, notified_idx;
2677         struct be_tx_obj *txo;
2678         int i, pending_txqs;
2679
2680         /* Stop polling for compls when HW has been silent for 10ms */
2681         do {
2682                 pending_txqs = adapter->num_tx_qs;
2683
2684                 for_all_tx_queues(adapter, txo, i) {
2685                         cmpl = 0;
2686                         num_wrbs = 0;
2687                         txq = &txo->q;
2688                         while ((txcp = be_tx_compl_get(txo))) {
2689                                 num_wrbs +=
2690                                         be_tx_compl_process(adapter, txo,
2691                                                             txcp->end_index);
2692                                 cmpl++;
2693                         }
2694                         if (cmpl) {
2695                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2696                                 atomic_sub(num_wrbs, &txq->used);
2697                                 timeo = 0;
2698                         }
2699                         if (!be_is_tx_compl_pending(txo))
2700                                 pending_txqs--;
2701                 }
2702
2703                 if (pending_txqs == 0 || ++timeo > 10 ||
2704                     be_check_error(adapter, BE_ERROR_HW))
2705                         break;
2706
2707                 mdelay(1);
2708         } while (true);
2709
2710         /* Free enqueued TX that was never notified to HW */
2711         for_all_tx_queues(adapter, txo, i) {
2712                 txq = &txo->q;
2713
2714                 if (atomic_read(&txq->used)) {
2715                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2716                                  i, atomic_read(&txq->used));
2717                         notified_idx = txq->tail;
2718                         end_idx = txq->tail;
2719                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2720                                   txq->len);
2721                         /* Use the tx-compl process logic to handle requests
2722                          * that were not sent to the HW.
2723                          */
2724                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2725                         atomic_sub(num_wrbs, &txq->used);
2726                         BUG_ON(atomic_read(&txq->used));
2727                         txo->pend_wrb_cnt = 0;
2728                         /* Since hw was never notified of these requests,
2729                          * reset TXQ indices
2730                          */
2731                         txq->head = notified_idx;
2732                         txq->tail = notified_idx;
2733                 }
2734         }
2735 }
2736
2737 static void be_evt_queues_destroy(struct be_adapter *adapter)
2738 {
2739         struct be_eq_obj *eqo;
2740         int i;
2741
2742         for_all_evt_queues(adapter, eqo, i) {
2743                 if (eqo->q.created) {
2744                         be_eq_clean(eqo);
2745                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2746                         napi_hash_del(&eqo->napi);
2747                         netif_napi_del(&eqo->napi);
2748                         free_cpumask_var(eqo->affinity_mask);
2749                 }
2750                 be_queue_free(adapter, &eqo->q);
2751         }
2752 }
2753
2754 static int be_evt_queues_create(struct be_adapter *adapter)
2755 {
2756         struct be_queue_info *eq;
2757         struct be_eq_obj *eqo;
2758         struct be_aic_obj *aic;
2759         int i, rc;
2760
2761         /* need enough EQs to service both RX and TX queues */
2762         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2763                                     max(adapter->cfg_num_rx_irqs,
2764                                         adapter->cfg_num_tx_irqs));
2765
2766         for_all_evt_queues(adapter, eqo, i) {
2767                 int numa_node = dev_to_node(&adapter->pdev->dev);
2768
2769                 aic = &adapter->aic_obj[i];
2770                 eqo->adapter = adapter;
2771                 eqo->idx = i;
2772                 aic->max_eqd = BE_MAX_EQD;
2773                 aic->enable = true;
2774
2775                 eq = &eqo->q;
2776                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2777                                     sizeof(struct be_eq_entry));
2778                 if (rc)
2779                         return rc;
2780
2781                 rc = be_cmd_eq_create(adapter, eqo);
2782                 if (rc)
2783                         return rc;
2784
2785                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2786                         return -ENOMEM;
2787                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2788                                 eqo->affinity_mask);
2789                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2790                                BE_NAPI_WEIGHT);
2791         }
2792         return 0;
2793 }
2794
2795 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2796 {
2797         struct be_queue_info *q;
2798
2799         q = &adapter->mcc_obj.q;
2800         if (q->created)
2801                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2802         be_queue_free(adapter, q);
2803
2804         q = &adapter->mcc_obj.cq;
2805         if (q->created)
2806                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2807         be_queue_free(adapter, q);
2808 }
2809
2810 /* Must be called only after TX qs are created as MCC shares TX EQ */
2811 static int be_mcc_queues_create(struct be_adapter *adapter)
2812 {
2813         struct be_queue_info *q, *cq;
2814
2815         cq = &adapter->mcc_obj.cq;
2816         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2817                            sizeof(struct be_mcc_compl)))
2818                 goto err;
2819
2820         /* Use the default EQ for MCC completions */
2821         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2822                 goto mcc_cq_free;
2823
2824         q = &adapter->mcc_obj.q;
2825         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2826                 goto mcc_cq_destroy;
2827
2828         if (be_cmd_mccq_create(adapter, q, cq))
2829                 goto mcc_q_free;
2830
2831         return 0;
2832
2833 mcc_q_free:
2834         be_queue_free(adapter, q);
2835 mcc_cq_destroy:
2836         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2837 mcc_cq_free:
2838         be_queue_free(adapter, cq);
2839 err:
2840         return -1;
2841 }
2842
2843 static void be_tx_queues_destroy(struct be_adapter *adapter)
2844 {
2845         struct be_queue_info *q;
2846         struct be_tx_obj *txo;
2847         u8 i;
2848
2849         for_all_tx_queues(adapter, txo, i) {
2850                 q = &txo->q;
2851                 if (q->created)
2852                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2853                 be_queue_free(adapter, q);
2854
2855                 q = &txo->cq;
2856                 if (q->created)
2857                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2858                 be_queue_free(adapter, q);
2859         }
2860 }
2861
2862 static int be_tx_qs_create(struct be_adapter *adapter)
2863 {
2864         struct be_queue_info *cq;
2865         struct be_tx_obj *txo;
2866         struct be_eq_obj *eqo;
2867         int status, i;
2868
2869         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2870
2871         for_all_tx_queues(adapter, txo, i) {
2872                 cq = &txo->cq;
2873                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2874                                         sizeof(struct be_eth_tx_compl));
2875                 if (status)
2876                         return status;
2877
2878                 u64_stats_init(&txo->stats.sync);
2879                 u64_stats_init(&txo->stats.sync_compl);
2880
2881                 /* If num_evt_qs is less than num_tx_qs, then more than
2882                  * one txq share an eq
2883                  */
2884                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2885                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2886                 if (status)
2887                         return status;
2888
2889                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2890                                         sizeof(struct be_eth_wrb));
2891                 if (status)
2892                         return status;
2893
2894                 status = be_cmd_txq_create(adapter, txo);
2895                 if (status)
2896                         return status;
2897
2898                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2899                                     eqo->idx);
2900         }
2901
2902         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2903                  adapter->num_tx_qs);
2904         return 0;
2905 }
2906
2907 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2908 {
2909         struct be_queue_info *q;
2910         struct be_rx_obj *rxo;
2911         int i;
2912
2913         for_all_rx_queues(adapter, rxo, i) {
2914                 q = &rxo->cq;
2915                 if (q->created)
2916                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2917                 be_queue_free(adapter, q);
2918         }
2919 }
2920
2921 static int be_rx_cqs_create(struct be_adapter *adapter)
2922 {
2923         struct be_queue_info *eq, *cq;
2924         struct be_rx_obj *rxo;
2925         int rc, i;
2926
2927         adapter->num_rss_qs =
2928                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2929
2930         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2931         if (adapter->num_rss_qs < 2)
2932                 adapter->num_rss_qs = 0;
2933
2934         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2935
2936         /* When the interface is not capable of RSS rings (and there is no
2937          * need to create a default RXQ) we'll still need one RXQ
2938          */
2939         if (adapter->num_rx_qs == 0)
2940                 adapter->num_rx_qs = 1;
2941
2942         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2943         for_all_rx_queues(adapter, rxo, i) {
2944                 rxo->adapter = adapter;
2945                 cq = &rxo->cq;
2946                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2947                                     sizeof(struct be_eth_rx_compl));
2948                 if (rc)
2949                         return rc;
2950
2951                 u64_stats_init(&rxo->stats.sync);
2952                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2953                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2954                 if (rc)
2955                         return rc;
2956         }
2957
2958         dev_info(&adapter->pdev->dev,
2959                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2960         return 0;
2961 }
2962
2963 static irqreturn_t be_intx(int irq, void *dev)
2964 {
2965         struct be_eq_obj *eqo = dev;
2966         struct be_adapter *adapter = eqo->adapter;
2967         int num_evts = 0;
2968
2969         /* IRQ is not expected when NAPI is scheduled as the EQ
2970          * will not be armed.
2971          * But, this can happen on Lancer INTx where it takes
2972          * a while to de-assert INTx or in BE2 where occasionaly
2973          * an interrupt may be raised even when EQ is unarmed.
2974          * If NAPI is already scheduled, then counting & notifying
2975          * events will orphan them.
2976          */
2977         if (napi_schedule_prep(&eqo->napi)) {
2978                 num_evts = events_get(eqo);
2979                 __napi_schedule(&eqo->napi);
2980                 if (num_evts)
2981                         eqo->spurious_intr = 0;
2982         }
2983         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2984
2985         /* Return IRQ_HANDLED only for the the first spurious intr
2986          * after a valid intr to stop the kernel from branding
2987          * this irq as a bad one!
2988          */
2989         if (num_evts || eqo->spurious_intr++ == 0)
2990                 return IRQ_HANDLED;
2991         else
2992                 return IRQ_NONE;
2993 }
2994
2995 static irqreturn_t be_msix(int irq, void *dev)
2996 {
2997         struct be_eq_obj *eqo = dev;
2998
2999         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3000         napi_schedule(&eqo->napi);
3001         return IRQ_HANDLED;
3002 }
3003
3004 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3005 {
3006         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3007 }
3008
3009 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3010                          int budget, int polling)
3011 {
3012         struct be_adapter *adapter = rxo->adapter;
3013         struct be_queue_info *rx_cq = &rxo->cq;
3014         struct be_rx_compl_info *rxcp;
3015         u32 work_done;
3016         u32 frags_consumed = 0;
3017
3018         for (work_done = 0; work_done < budget; work_done++) {
3019                 rxcp = be_rx_compl_get(rxo);
3020                 if (!rxcp)
3021                         break;
3022
3023                 /* Is it a flush compl that has no data */
3024                 if (unlikely(rxcp->num_rcvd == 0))
3025                         goto loop_continue;
3026
3027                 /* Discard compl with partial DMA Lancer B0 */
3028                 if (unlikely(!rxcp->pkt_size)) {
3029                         be_rx_compl_discard(rxo, rxcp);
3030                         goto loop_continue;
3031                 }
3032
3033                 /* On BE drop pkts that arrive due to imperfect filtering in
3034                  * promiscuous mode on some skews
3035                  */
3036                 if (unlikely(rxcp->port != adapter->port_num &&
3037                              !lancer_chip(adapter))) {
3038                         be_rx_compl_discard(rxo, rxcp);
3039                         goto loop_continue;
3040                 }
3041
3042                 /* Don't do gro when we're busy_polling */
3043                 if (do_gro(rxcp) && polling != BUSY_POLLING)
3044                         be_rx_compl_process_gro(rxo, napi, rxcp);
3045                 else
3046                         be_rx_compl_process(rxo, napi, rxcp);
3047
3048 loop_continue:
3049                 frags_consumed += rxcp->num_rcvd;
3050                 be_rx_stats_update(rxo, rxcp);
3051         }
3052
3053         if (work_done) {
3054                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3055
3056                 /* When an rx-obj gets into post_starved state, just
3057                  * let be_worker do the posting.
3058                  */
3059                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3060                     !rxo->rx_post_starved)
3061                         be_post_rx_frags(rxo, GFP_ATOMIC,
3062                                          max_t(u32, MAX_RX_POST,
3063                                                frags_consumed));
3064         }
3065
3066         return work_done;
3067 }
3068
3069 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3070 {
3071         switch (status) {
3072         case BE_TX_COMP_HDR_PARSE_ERR:
3073                 tx_stats(txo)->tx_hdr_parse_err++;
3074                 break;
3075         case BE_TX_COMP_NDMA_ERR:
3076                 tx_stats(txo)->tx_dma_err++;
3077                 break;
3078         case BE_TX_COMP_ACL_ERR:
3079                 tx_stats(txo)->tx_spoof_check_err++;
3080                 break;
3081         }
3082 }
3083
3084 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3085 {
3086         switch (status) {
3087         case LANCER_TX_COMP_LSO_ERR:
3088                 tx_stats(txo)->tx_tso_err++;
3089                 break;
3090         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3091         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3092                 tx_stats(txo)->tx_spoof_check_err++;
3093                 break;
3094         case LANCER_TX_COMP_QINQ_ERR:
3095                 tx_stats(txo)->tx_qinq_err++;
3096                 break;
3097         case LANCER_TX_COMP_PARITY_ERR:
3098                 tx_stats(txo)->tx_internal_parity_err++;
3099                 break;
3100         case LANCER_TX_COMP_DMA_ERR:
3101                 tx_stats(txo)->tx_dma_err++;
3102                 break;
3103         }
3104 }
3105
3106 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3107                           int idx)
3108 {
3109         int num_wrbs = 0, work_done = 0;
3110         struct be_tx_compl_info *txcp;
3111
3112         while ((txcp = be_tx_compl_get(txo))) {
3113                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3114                 work_done++;
3115
3116                 if (txcp->status) {
3117                         if (lancer_chip(adapter))
3118                                 lancer_update_tx_err(txo, txcp->status);
3119                         else
3120                                 be_update_tx_err(txo, txcp->status);
3121                 }
3122         }
3123
3124         if (work_done) {
3125                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3126                 atomic_sub(num_wrbs, &txo->q.used);
3127
3128                 /* As Tx wrbs have been freed up, wake up netdev queue
3129                  * if it was stopped due to lack of tx wrbs.  */
3130                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3131                     be_can_txq_wake(txo)) {
3132                         netif_wake_subqueue(adapter->netdev, idx);
3133                 }
3134
3135                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3136                 tx_stats(txo)->tx_compl += work_done;
3137                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3138         }
3139 }
3140
3141 #ifdef CONFIG_NET_RX_BUSY_POLL
3142 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3143 {
3144         bool status = true;
3145
3146         spin_lock(&eqo->lock); /* BH is already disabled */
3147         if (eqo->state & BE_EQ_LOCKED) {
3148                 WARN_ON(eqo->state & BE_EQ_NAPI);
3149                 eqo->state |= BE_EQ_NAPI_YIELD;
3150                 status = false;
3151         } else {
3152                 eqo->state = BE_EQ_NAPI;
3153         }
3154         spin_unlock(&eqo->lock);
3155         return status;
3156 }
3157
3158 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3159 {
3160         spin_lock(&eqo->lock); /* BH is already disabled */
3161
3162         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3163         eqo->state = BE_EQ_IDLE;
3164
3165         spin_unlock(&eqo->lock);
3166 }
3167
3168 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3169 {
3170         bool status = true;
3171
3172         spin_lock_bh(&eqo->lock);
3173         if (eqo->state & BE_EQ_LOCKED) {
3174                 eqo->state |= BE_EQ_POLL_YIELD;
3175                 status = false;
3176         } else {
3177                 eqo->state |= BE_EQ_POLL;
3178         }
3179         spin_unlock_bh(&eqo->lock);
3180         return status;
3181 }
3182
3183 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3184 {
3185         spin_lock_bh(&eqo->lock);
3186
3187         WARN_ON(eqo->state & (BE_EQ_NAPI));
3188         eqo->state = BE_EQ_IDLE;
3189
3190         spin_unlock_bh(&eqo->lock);
3191 }
3192
3193 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3194 {
3195         spin_lock_init(&eqo->lock);
3196         eqo->state = BE_EQ_IDLE;
3197 }
3198
3199 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3200 {
3201         local_bh_disable();
3202
3203         /* It's enough to just acquire napi lock on the eqo to stop
3204          * be_busy_poll() from processing any queueus.
3205          */
3206         while (!be_lock_napi(eqo))
3207                 mdelay(1);
3208
3209         local_bh_enable();
3210 }
3211
3212 #else /* CONFIG_NET_RX_BUSY_POLL */
3213
3214 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3215 {
3216         return true;
3217 }
3218
3219 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3220 {
3221 }
3222
3223 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3224 {
3225         return false;
3226 }
3227
3228 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3229 {
3230 }
3231
3232 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3233 {
3234 }
3235
3236 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3237 {
3238 }
3239 #endif /* CONFIG_NET_RX_BUSY_POLL */
3240
3241 int be_poll(struct napi_struct *napi, int budget)
3242 {
3243         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3244         struct be_adapter *adapter = eqo->adapter;
3245         int max_work = 0, work, i, num_evts;
3246         struct be_rx_obj *rxo;
3247         struct be_tx_obj *txo;
3248         u32 mult_enc = 0;
3249
3250         num_evts = events_get(eqo);
3251
3252         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3253                 be_process_tx(adapter, txo, i);
3254
3255         if (be_lock_napi(eqo)) {
3256                 /* This loop will iterate twice for EQ0 in which
3257                  * completions of the last RXQ (default one) are also processed
3258                  * For other EQs the loop iterates only once
3259                  */
3260                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3261                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3262                         max_work = max(work, max_work);
3263                 }
3264                 be_unlock_napi(eqo);
3265         } else {
3266                 max_work = budget;
3267         }
3268
3269         if (is_mcc_eqo(eqo))
3270                 be_process_mcc(adapter);
3271
3272         if (max_work < budget) {
3273                 napi_complete(napi);
3274
3275                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3276                  * delay via a delay multiplier encoding value
3277                  */
3278                 if (skyhawk_chip(adapter))
3279                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3280
3281                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3282                              mult_enc);
3283         } else {
3284                 /* As we'll continue in polling mode, count and clear events */
3285                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3286         }
3287         return max_work;
3288 }
3289
3290 #ifdef CONFIG_NET_RX_BUSY_POLL
3291 static int be_busy_poll(struct napi_struct *napi)
3292 {
3293         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3294         struct be_adapter *adapter = eqo->adapter;
3295         struct be_rx_obj *rxo;
3296         int i, work = 0;
3297
3298         if (!be_lock_busy_poll(eqo))
3299                 return LL_FLUSH_BUSY;
3300
3301         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3302                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3303                 if (work)
3304                         break;
3305         }
3306
3307         be_unlock_busy_poll(eqo);
3308         return work;
3309 }
3310 #endif
3311
3312 void be_detect_error(struct be_adapter *adapter)
3313 {
3314         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3315         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3316         u32 i;
3317         struct device *dev = &adapter->pdev->dev;
3318
3319         if (be_check_error(adapter, BE_ERROR_HW))
3320                 return;
3321
3322         if (lancer_chip(adapter)) {
3323                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3324                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3325                         be_set_error(adapter, BE_ERROR_UE);
3326                         sliport_err1 = ioread32(adapter->db +
3327                                                 SLIPORT_ERROR1_OFFSET);
3328                         sliport_err2 = ioread32(adapter->db +
3329                                                 SLIPORT_ERROR2_OFFSET);
3330                         /* Do not log error messages if its a FW reset */
3331                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3332                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3333                                 dev_info(dev, "Firmware update in progress\n");
3334                         } else {
3335                                 dev_err(dev, "Error detected in the card\n");
3336                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3337                                         sliport_status);
3338                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3339                                         sliport_err1);
3340                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3341                                         sliport_err2);
3342                         }
3343                 }
3344         } else {
3345                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3346                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3347                 ue_lo_mask = ioread32(adapter->pcicfg +
3348                                       PCICFG_UE_STATUS_LOW_MASK);
3349                 ue_hi_mask = ioread32(adapter->pcicfg +
3350                                       PCICFG_UE_STATUS_HI_MASK);
3351
3352                 ue_lo = (ue_lo & ~ue_lo_mask);
3353                 ue_hi = (ue_hi & ~ue_hi_mask);
3354
3355                 /* On certain platforms BE hardware can indicate spurious UEs.
3356                  * Allow HW to stop working completely in case of a real UE.
3357                  * Hence not setting the hw_error for UE detection.
3358                  */
3359
3360                 if (ue_lo || ue_hi) {
3361                         dev_err(dev,
3362                                 "Unrecoverable Error detected in the adapter");
3363                         dev_err(dev, "Please reboot server to recover");
3364                         if (skyhawk_chip(adapter))
3365                                 be_set_error(adapter, BE_ERROR_UE);
3366
3367                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3368                                 if (ue_lo & 1)
3369                                         dev_err(dev, "UE: %s bit set\n",
3370                                                 ue_status_low_desc[i]);
3371                         }
3372                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3373                                 if (ue_hi & 1)
3374                                         dev_err(dev, "UE: %s bit set\n",
3375                                                 ue_status_hi_desc[i]);
3376                         }
3377                 }
3378         }
3379 }
3380
3381 static void be_msix_disable(struct be_adapter *adapter)
3382 {
3383         if (msix_enabled(adapter)) {
3384                 pci_disable_msix(adapter->pdev);
3385                 adapter->num_msix_vec = 0;
3386                 adapter->num_msix_roce_vec = 0;
3387         }
3388 }
3389
3390 static int be_msix_enable(struct be_adapter *adapter)
3391 {
3392         unsigned int i, max_roce_eqs;
3393         struct device *dev = &adapter->pdev->dev;
3394         int num_vec;
3395
3396         /* If RoCE is supported, program the max number of vectors that
3397          * could be used for NIC and RoCE, else, just program the number
3398          * we'll use initially.
3399          */
3400         if (be_roce_supported(adapter)) {
3401                 max_roce_eqs =
3402                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3403                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3404                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3405         } else {
3406                 num_vec = max(adapter->cfg_num_rx_irqs,
3407                               adapter->cfg_num_tx_irqs);
3408         }
3409
3410         for (i = 0; i < num_vec; i++)
3411                 adapter->msix_entries[i].entry = i;
3412
3413         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3414                                         MIN_MSIX_VECTORS, num_vec);
3415         if (num_vec < 0)
3416                 goto fail;
3417
3418         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3419                 adapter->num_msix_roce_vec = num_vec / 2;
3420                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3421                          adapter->num_msix_roce_vec);
3422         }
3423
3424         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3425
3426         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3427                  adapter->num_msix_vec);
3428         return 0;
3429
3430 fail:
3431         dev_warn(dev, "MSIx enable failed\n");
3432
3433         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3434         if (be_virtfn(adapter))
3435                 return num_vec;
3436         return 0;
3437 }
3438
3439 static inline int be_msix_vec_get(struct be_adapter *adapter,
3440                                   struct be_eq_obj *eqo)
3441 {
3442         return adapter->msix_entries[eqo->msix_idx].vector;
3443 }
3444
3445 static int be_msix_register(struct be_adapter *adapter)
3446 {
3447         struct net_device *netdev = adapter->netdev;
3448         struct be_eq_obj *eqo;
3449         int status, i, vec;
3450
3451         for_all_evt_queues(adapter, eqo, i) {
3452                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3453                 vec = be_msix_vec_get(adapter, eqo);
3454                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3455                 if (status)
3456                         goto err_msix;
3457
3458                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3459         }
3460
3461         return 0;
3462 err_msix:
3463         for (i--; i >= 0; i--) {
3464                 eqo = &adapter->eq_obj[i];
3465                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3466         }
3467         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3468                  status);
3469         be_msix_disable(adapter);
3470         return status;
3471 }
3472
3473 static int be_irq_register(struct be_adapter *adapter)
3474 {
3475         struct net_device *netdev = adapter->netdev;
3476         int status;
3477
3478         if (msix_enabled(adapter)) {
3479                 status = be_msix_register(adapter);
3480                 if (status == 0)
3481                         goto done;
3482                 /* INTx is not supported for VF */
3483                 if (be_virtfn(adapter))
3484                         return status;
3485         }
3486
3487         /* INTx: only the first EQ is used */
3488         netdev->irq = adapter->pdev->irq;
3489         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3490                              &adapter->eq_obj[0]);
3491         if (status) {
3492                 dev_err(&adapter->pdev->dev,
3493                         "INTx request IRQ failed - err %d\n", status);
3494                 return status;
3495         }
3496 done:
3497         adapter->isr_registered = true;
3498         return 0;
3499 }
3500
3501 static void be_irq_unregister(struct be_adapter *adapter)
3502 {
3503         struct net_device *netdev = adapter->netdev;
3504         struct be_eq_obj *eqo;
3505         int i, vec;
3506
3507         if (!adapter->isr_registered)
3508                 return;
3509
3510         /* INTx */
3511         if (!msix_enabled(adapter)) {
3512                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3513                 goto done;
3514         }
3515
3516         /* MSIx */
3517         for_all_evt_queues(adapter, eqo, i) {
3518                 vec = be_msix_vec_get(adapter, eqo);
3519                 irq_set_affinity_hint(vec, NULL);
3520                 free_irq(vec, eqo);
3521         }
3522
3523 done:
3524         adapter->isr_registered = false;
3525 }
3526
3527 static void be_rx_qs_destroy(struct be_adapter *adapter)
3528 {
3529         struct rss_info *rss = &adapter->rss_info;
3530         struct be_queue_info *q;
3531         struct be_rx_obj *rxo;
3532         int i;
3533
3534         for_all_rx_queues(adapter, rxo, i) {
3535                 q = &rxo->q;
3536                 if (q->created) {
3537                         /* If RXQs are destroyed while in an "out of buffer"
3538                          * state, there is a possibility of an HW stall on
3539                          * Lancer. So, post 64 buffers to each queue to relieve
3540                          * the "out of buffer" condition.
3541                          * Make sure there's space in the RXQ before posting.
3542                          */
3543                         if (lancer_chip(adapter)) {
3544                                 be_rx_cq_clean(rxo);
3545                                 if (atomic_read(&q->used) == 0)
3546                                         be_post_rx_frags(rxo, GFP_KERNEL,
3547                                                          MAX_RX_POST);
3548                         }
3549
3550                         be_cmd_rxq_destroy(adapter, q);
3551                         be_rx_cq_clean(rxo);
3552                         be_rxq_clean(rxo);
3553                 }
3554                 be_queue_free(adapter, q);
3555         }
3556
3557         if (rss->rss_flags) {
3558                 rss->rss_flags = RSS_ENABLE_NONE;
3559                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3560                                   128, rss->rss_hkey);
3561         }
3562 }
3563
3564 static void be_disable_if_filters(struct be_adapter *adapter)
3565 {
3566         be_cmd_pmac_del(adapter, adapter->if_handle,
3567                         adapter->pmac_id[0], 0);
3568
3569         be_clear_uc_list(adapter);
3570         be_clear_mc_list(adapter);
3571
3572         /* The IFACE flags are enabled in the open path and cleared
3573          * in the close path. When a VF gets detached from the host and
3574          * assigned to a VM the following happens:
3575          *      - VF's IFACE flags get cleared in the detach path
3576          *      - IFACE create is issued by the VF in the attach path
3577          * Due to a bug in the BE3/Skyhawk-R FW
3578          * (Lancer FW doesn't have the bug), the IFACE capability flags
3579          * specified along with the IFACE create cmd issued by a VF are not
3580          * honoured by FW.  As a consequence, if a *new* driver
3581          * (that enables/disables IFACE flags in open/close)
3582          * is loaded in the host and an *old* driver is * used by a VM/VF,
3583          * the IFACE gets created *without* the needed flags.
3584          * To avoid this, disable RX-filter flags only for Lancer.
3585          */
3586         if (lancer_chip(adapter)) {
3587                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3588                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3589         }
3590 }
3591
3592 static int be_close(struct net_device *netdev)
3593 {
3594         struct be_adapter *adapter = netdev_priv(netdev);
3595         struct be_eq_obj *eqo;
3596         int i;
3597
3598         /* This protection is needed as be_close() may be called even when the
3599          * adapter is in cleared state (after eeh perm failure)
3600          */
3601         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3602                 return 0;
3603
3604         /* Before attempting cleanup ensure all the pending cmds in the
3605          * config_wq have finished execution
3606          */
3607         flush_workqueue(be_wq);
3608
3609         be_disable_if_filters(adapter);
3610
3611         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3612                 for_all_evt_queues(adapter, eqo, i) {
3613                         napi_disable(&eqo->napi);
3614                         be_disable_busy_poll(eqo);
3615                 }
3616                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3617         }
3618
3619         be_async_mcc_disable(adapter);
3620
3621         /* Wait for all pending tx completions to arrive so that
3622          * all tx skbs are freed.
3623          */
3624         netif_tx_disable(netdev);
3625         be_tx_compl_clean(adapter);
3626
3627         be_rx_qs_destroy(adapter);
3628
3629         for_all_evt_queues(adapter, eqo, i) {
3630                 if (msix_enabled(adapter))
3631                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3632                 else
3633                         synchronize_irq(netdev->irq);
3634                 be_eq_clean(eqo);
3635         }
3636
3637         be_irq_unregister(adapter);
3638
3639         return 0;
3640 }
3641
3642 static int be_rx_qs_create(struct be_adapter *adapter)
3643 {
3644         struct rss_info *rss = &adapter->rss_info;
3645         u8 rss_key[RSS_HASH_KEY_LEN];
3646         struct be_rx_obj *rxo;
3647         int rc, i, j;
3648
3649         for_all_rx_queues(adapter, rxo, i) {
3650                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3651                                     sizeof(struct be_eth_rx_d));
3652                 if (rc)
3653                         return rc;
3654         }
3655
3656         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3657                 rxo = default_rxo(adapter);
3658                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3659                                        rx_frag_size, adapter->if_handle,
3660                                        false, &rxo->rss_id);
3661                 if (rc)
3662                         return rc;
3663         }
3664
3665         for_all_rss_queues(adapter, rxo, i) {
3666                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3667                                        rx_frag_size, adapter->if_handle,
3668                                        true, &rxo->rss_id);
3669                 if (rc)
3670                         return rc;
3671         }
3672
3673         if (be_multi_rxq(adapter)) {
3674                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3675                         for_all_rss_queues(adapter, rxo, i) {
3676                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3677                                         break;
3678                                 rss->rsstable[j + i] = rxo->rss_id;
3679                                 rss->rss_queue[j + i] = i;
3680                         }
3681                 }
3682                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3683                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3684
3685                 if (!BEx_chip(adapter))
3686                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3687                                 RSS_ENABLE_UDP_IPV6;
3688
3689                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3690                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3691                                        RSS_INDIR_TABLE_LEN, rss_key);
3692                 if (rc) {
3693                         rss->rss_flags = RSS_ENABLE_NONE;
3694                         return rc;
3695                 }
3696
3697                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3698         } else {
3699                 /* Disable RSS, if only default RX Q is created */
3700                 rss->rss_flags = RSS_ENABLE_NONE;
3701         }
3702
3703
3704         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3705          * which is a queue empty condition
3706          */
3707         for_all_rx_queues(adapter, rxo, i)
3708                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3709
3710         return 0;
3711 }
3712
3713 static int be_enable_if_filters(struct be_adapter *adapter)
3714 {
3715         int status;
3716
3717         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3718         if (status)
3719                 return status;
3720
3721         /* For BE3 VFs, the PF programs the initial MAC address */
3722         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3723                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3724                                          adapter->if_handle,
3725                                          &adapter->pmac_id[0], 0);
3726                 if (status)
3727                         return status;
3728         }
3729
3730         if (adapter->vlans_added)
3731                 be_vid_config(adapter);
3732
3733         __be_set_rx_mode(adapter);
3734
3735         return 0;
3736 }
3737
3738 static int be_open(struct net_device *netdev)
3739 {
3740         struct be_adapter *adapter = netdev_priv(netdev);
3741         struct be_eq_obj *eqo;
3742         struct be_rx_obj *rxo;
3743         struct be_tx_obj *txo;
3744         u8 link_status;
3745         int status, i;
3746
3747         status = be_rx_qs_create(adapter);
3748         if (status)
3749                 goto err;
3750
3751         status = be_enable_if_filters(adapter);
3752         if (status)
3753                 goto err;
3754
3755         status = be_irq_register(adapter);
3756         if (status)
3757                 goto err;
3758
3759         for_all_rx_queues(adapter, rxo, i)
3760                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3761
3762         for_all_tx_queues(adapter, txo, i)
3763                 be_cq_notify(adapter, txo->cq.id, true, 0);
3764
3765         be_async_mcc_enable(adapter);
3766
3767         for_all_evt_queues(adapter, eqo, i) {
3768                 napi_enable(&eqo->napi);
3769                 be_enable_busy_poll(eqo);
3770                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3771         }
3772         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3773
3774         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3775         if (!status)
3776                 be_link_status_update(adapter, link_status);
3777
3778         netif_tx_start_all_queues(netdev);
3779         if (skyhawk_chip(adapter))
3780                 udp_tunnel_get_rx_info(netdev);
3781
3782         return 0;
3783 err:
3784         be_close(adapter->netdev);
3785         return -EIO;
3786 }
3787
3788 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3789 {
3790         u32 addr;
3791
3792         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3793
3794         mac[5] = (u8)(addr & 0xFF);
3795         mac[4] = (u8)((addr >> 8) & 0xFF);
3796         mac[3] = (u8)((addr >> 16) & 0xFF);
3797         /* Use the OUI from the current MAC address */
3798         memcpy(mac, adapter->netdev->dev_addr, 3);
3799 }
3800
3801 /*
3802  * Generate a seed MAC address from the PF MAC Address using jhash.
3803  * MAC Address for VFs are assigned incrementally starting from the seed.
3804  * These addresses are programmed in the ASIC by the PF and the VF driver
3805  * queries for the MAC address during its probe.
3806  */
3807 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3808 {
3809         u32 vf;
3810         int status = 0;
3811         u8 mac[ETH_ALEN];
3812         struct be_vf_cfg *vf_cfg;
3813
3814         be_vf_eth_addr_generate(adapter, mac);
3815
3816         for_all_vfs(adapter, vf_cfg, vf) {
3817                 if (BEx_chip(adapter))
3818                         status = be_cmd_pmac_add(adapter, mac,
3819                                                  vf_cfg->if_handle,
3820                                                  &vf_cfg->pmac_id, vf + 1);
3821                 else
3822                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3823                                                 vf + 1);
3824
3825                 if (status)
3826                         dev_err(&adapter->pdev->dev,
3827                                 "Mac address assignment failed for VF %d\n",
3828                                 vf);
3829                 else
3830                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3831
3832                 mac[5] += 1;
3833         }
3834         return status;
3835 }
3836
3837 static int be_vfs_mac_query(struct be_adapter *adapter)
3838 {
3839         int status, vf;
3840         u8 mac[ETH_ALEN];
3841         struct be_vf_cfg *vf_cfg;
3842
3843         for_all_vfs(adapter, vf_cfg, vf) {
3844                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3845                                                mac, vf_cfg->if_handle,
3846                                                false, vf+1);
3847                 if (status)
3848                         return status;
3849                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3850         }
3851         return 0;
3852 }
3853
3854 static void be_vf_clear(struct be_adapter *adapter)
3855 {
3856         struct be_vf_cfg *vf_cfg;
3857         u32 vf;
3858
3859         if (pci_vfs_assigned(adapter->pdev)) {
3860                 dev_warn(&adapter->pdev->dev,
3861                          "VFs are assigned to VMs: not disabling VFs\n");
3862                 goto done;
3863         }
3864
3865         pci_disable_sriov(adapter->pdev);
3866
3867         for_all_vfs(adapter, vf_cfg, vf) {
3868                 if (BEx_chip(adapter))
3869                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3870                                         vf_cfg->pmac_id, vf + 1);
3871                 else
3872                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3873                                        vf + 1);
3874
3875                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3876         }
3877
3878         if (BE3_chip(adapter))
3879                 be_cmd_set_hsw_config(adapter, 0, 0,
3880                                       adapter->if_handle,
3881                                       PORT_FWD_TYPE_PASSTHRU, 0);
3882 done:
3883         kfree(adapter->vf_cfg);
3884         adapter->num_vfs = 0;
3885         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3886 }
3887
3888 static void be_clear_queues(struct be_adapter *adapter)
3889 {
3890         be_mcc_queues_destroy(adapter);
3891         be_rx_cqs_destroy(adapter);
3892         be_tx_queues_destroy(adapter);
3893         be_evt_queues_destroy(adapter);
3894 }
3895
3896 static void be_cancel_worker(struct be_adapter *adapter)
3897 {
3898         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3899                 cancel_delayed_work_sync(&adapter->work);
3900                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3901         }
3902 }
3903
3904 static void be_cancel_err_detection(struct be_adapter *adapter)
3905 {
3906         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3907                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3908                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3909         }
3910 }
3911
3912 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3913 {
3914         struct net_device *netdev = adapter->netdev;
3915
3916         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3917                 be_cmd_manage_iface(adapter, adapter->if_handle,
3918                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3919
3920         if (adapter->vxlan_port)
3921                 be_cmd_set_vxlan_port(adapter, 0);
3922
3923         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3924         adapter->vxlan_port = 0;
3925
3926         netdev->hw_enc_features = 0;
3927         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3928         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3929 }
3930
3931 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3932                                 struct be_resources *vft_res)
3933 {
3934         struct be_resources res = adapter->pool_res;
3935         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3936         struct be_resources res_mod = {0};
3937         u16 num_vf_qs = 1;
3938
3939         /* Distribute the queue resources among the PF and it's VFs */
3940         if (num_vfs) {
3941                 /* Divide the rx queues evenly among the VFs and the PF, capped
3942                  * at VF-EQ-count. Any remainder queues belong to the PF.
3943                  */
3944                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3945                                 res.max_rss_qs / (num_vfs + 1));
3946
3947                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3948                  * RSS Tables per port. Provide RSS on VFs, only if number of
3949                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3950                  */
3951                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3952                         num_vf_qs = 1;
3953         }
3954
3955         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3956          * which are modifiable using SET_PROFILE_CONFIG cmd.
3957          */
3958         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3959                                   RESOURCE_MODIFIABLE, 0);
3960
3961         /* If RSS IFACE capability flags are modifiable for a VF, set the
3962          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3963          * more than 1 RSSQ is available for a VF.
3964          * Otherwise, provision only 1 queue pair for VF.
3965          */
3966         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3967                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3968                 if (num_vf_qs > 1) {
3969                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3970                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3971                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3972                 } else {
3973                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3974                                              BE_IF_FLAGS_DEFQ_RSS);
3975                 }
3976         } else {
3977                 num_vf_qs = 1;
3978         }
3979
3980         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3981                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3982                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3983         }
3984
3985         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3986         vft_res->max_rx_qs = num_vf_qs;
3987         vft_res->max_rss_qs = num_vf_qs;
3988         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3989         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3990
3991         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3992          * among the PF and it's VFs, if the fields are changeable
3993          */
3994         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3995                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3996
3997         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3998                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3999
4000         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4001                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4002
4003         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4004                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4005 }
4006
4007 static void be_if_destroy(struct be_adapter *adapter)
4008 {
4009         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4010
4011         kfree(adapter->pmac_id);
4012         adapter->pmac_id = NULL;
4013
4014         kfree(adapter->mc_list);
4015         adapter->mc_list = NULL;
4016
4017         kfree(adapter->uc_list);
4018         adapter->uc_list = NULL;
4019 }
4020
4021 static int be_clear(struct be_adapter *adapter)
4022 {
4023         struct pci_dev *pdev = adapter->pdev;
4024         struct  be_resources vft_res = {0};
4025
4026         be_cancel_worker(adapter);
4027
4028         flush_workqueue(be_wq);
4029
4030         if (sriov_enabled(adapter))
4031                 be_vf_clear(adapter);
4032
4033         /* Re-configure FW to distribute resources evenly across max-supported
4034          * number of VFs, only when VFs are not already enabled.
4035          */
4036         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4037             !pci_vfs_assigned(pdev)) {
4038                 be_calculate_vf_res(adapter,
4039                                     pci_sriov_get_totalvfs(pdev),
4040                                     &vft_res);
4041                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4042                                         pci_sriov_get_totalvfs(pdev),
4043                                         &vft_res);
4044         }
4045
4046         be_disable_vxlan_offloads(adapter);
4047
4048         be_if_destroy(adapter);
4049
4050         be_clear_queues(adapter);
4051
4052         be_msix_disable(adapter);
4053         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4054         return 0;
4055 }
4056
4057 static int be_vfs_if_create(struct be_adapter *adapter)
4058 {
4059         struct be_resources res = {0};
4060         u32 cap_flags, en_flags, vf;
4061         struct be_vf_cfg *vf_cfg;
4062         int status;
4063
4064         /* If a FW profile exists, then cap_flags are updated */
4065         cap_flags = BE_VF_IF_EN_FLAGS;
4066
4067         for_all_vfs(adapter, vf_cfg, vf) {
4068                 if (!BE3_chip(adapter)) {
4069                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4070                                                            ACTIVE_PROFILE_TYPE,
4071                                                            RESOURCE_LIMITS,
4072                                                            vf + 1);
4073                         if (!status) {
4074                                 cap_flags = res.if_cap_flags;
4075                                 /* Prevent VFs from enabling VLAN promiscuous
4076                                  * mode
4077                                  */
4078                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4079                         }
4080                 }
4081
4082                 /* PF should enable IF flags during proxy if_create call */
4083                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4084                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4085                                           &vf_cfg->if_handle, vf + 1);
4086                 if (status)
4087                         return status;
4088         }
4089
4090         return 0;
4091 }
4092
4093 static int be_vf_setup_init(struct be_adapter *adapter)
4094 {
4095         struct be_vf_cfg *vf_cfg;
4096         int vf;
4097
4098         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4099                                   GFP_KERNEL);
4100         if (!adapter->vf_cfg)
4101                 return -ENOMEM;
4102
4103         for_all_vfs(adapter, vf_cfg, vf) {
4104                 vf_cfg->if_handle = -1;
4105                 vf_cfg->pmac_id = -1;
4106         }
4107         return 0;
4108 }
4109
4110 static int be_vf_setup(struct be_adapter *adapter)
4111 {
4112         struct device *dev = &adapter->pdev->dev;
4113         struct be_vf_cfg *vf_cfg;
4114         int status, old_vfs, vf;
4115         bool spoofchk;
4116
4117         old_vfs = pci_num_vf(adapter->pdev);
4118
4119         status = be_vf_setup_init(adapter);
4120         if (status)
4121                 goto err;
4122
4123         if (old_vfs) {
4124                 for_all_vfs(adapter, vf_cfg, vf) {
4125                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4126                         if (status)
4127                                 goto err;
4128                 }
4129
4130                 status = be_vfs_mac_query(adapter);
4131                 if (status)
4132                         goto err;
4133         } else {
4134                 status = be_vfs_if_create(adapter);
4135                 if (status)
4136                         goto err;
4137
4138                 status = be_vf_eth_addr_config(adapter);
4139                 if (status)
4140                         goto err;
4141         }
4142
4143         for_all_vfs(adapter, vf_cfg, vf) {
4144                 /* Allow VFs to programs MAC/VLAN filters */
4145                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4146                                                   vf + 1);
4147                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4148                         status = be_cmd_set_fn_privileges(adapter,
4149                                                           vf_cfg->privileges |
4150                                                           BE_PRIV_FILTMGMT,
4151                                                           vf + 1);
4152                         if (!status) {
4153                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4154                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4155                                          vf);
4156                         }
4157                 }
4158
4159                 /* Allow full available bandwidth */
4160                 if (!old_vfs)
4161                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4162
4163                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4164                                                vf_cfg->if_handle, NULL,
4165                                                &spoofchk);
4166                 if (!status)
4167                         vf_cfg->spoofchk = spoofchk;
4168
4169                 if (!old_vfs) {
4170                         be_cmd_enable_vf(adapter, vf + 1);
4171                         be_cmd_set_logical_link_config(adapter,
4172                                                        IFLA_VF_LINK_STATE_AUTO,
4173                                                        vf+1);
4174                 }
4175         }
4176
4177         if (!old_vfs) {
4178                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4179                 if (status) {
4180                         dev_err(dev, "SRIOV enable failed\n");
4181                         adapter->num_vfs = 0;
4182                         goto err;
4183                 }
4184         }
4185
4186         if (BE3_chip(adapter)) {
4187                 /* On BE3, enable VEB only when SRIOV is enabled */
4188                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4189                                                adapter->if_handle,
4190                                                PORT_FWD_TYPE_VEB, 0);
4191                 if (status)
4192                         goto err;
4193         }
4194
4195         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4196         return 0;
4197 err:
4198         dev_err(dev, "VF setup failed\n");
4199         be_vf_clear(adapter);
4200         return status;
4201 }
4202
4203 /* Converting function_mode bits on BE3 to SH mc_type enums */
4204
4205 static u8 be_convert_mc_type(u32 function_mode)
4206 {
4207         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4208                 return vNIC1;
4209         else if (function_mode & QNQ_MODE)
4210                 return FLEX10;
4211         else if (function_mode & VNIC_MODE)
4212                 return vNIC2;
4213         else if (function_mode & UMC_ENABLED)
4214                 return UMC;
4215         else
4216                 return MC_NONE;
4217 }
4218
4219 /* On BE2/BE3 FW does not suggest the supported limits */
4220 static void BEx_get_resources(struct be_adapter *adapter,
4221                               struct be_resources *res)
4222 {
4223         bool use_sriov = adapter->num_vfs ? 1 : 0;
4224
4225         if (be_physfn(adapter))
4226                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4227         else
4228                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4229
4230         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4231
4232         if (be_is_mc(adapter)) {
4233                 /* Assuming that there are 4 channels per port,
4234                  * when multi-channel is enabled
4235                  */
4236                 if (be_is_qnq_mode(adapter))
4237                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4238                 else
4239                         /* In a non-qnq multichannel mode, the pvid
4240                          * takes up one vlan entry
4241                          */
4242                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4243         } else {
4244                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4245         }
4246
4247         res->max_mcast_mac = BE_MAX_MC;
4248
4249         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4250          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4251          *    *only* if it is RSS-capable.
4252          */
4253         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4254             be_virtfn(adapter) ||
4255             (be_is_mc(adapter) &&
4256              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4257                 res->max_tx_qs = 1;
4258         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4259                 struct be_resources super_nic_res = {0};
4260
4261                 /* On a SuperNIC profile, the driver needs to use the
4262                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4263                  */
4264                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4265                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4266                                           0);
4267                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4268                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4269         } else {
4270                 res->max_tx_qs = BE3_MAX_TX_QS;
4271         }
4272
4273         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4274             !use_sriov && be_physfn(adapter))
4275                 res->max_rss_qs = (adapter->be3_native) ?
4276                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4277         res->max_rx_qs = res->max_rss_qs + 1;
4278
4279         if (be_physfn(adapter))
4280                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4281                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4282         else
4283                 res->max_evt_qs = 1;
4284
4285         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4286         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4287         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4288                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4289 }
4290
4291 static void be_setup_init(struct be_adapter *adapter)
4292 {
4293         adapter->vlan_prio_bmap = 0xff;
4294         adapter->phy.link_speed = -1;
4295         adapter->if_handle = -1;
4296         adapter->be3_native = false;
4297         adapter->if_flags = 0;
4298         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4299         if (be_physfn(adapter))
4300                 adapter->cmd_privileges = MAX_PRIVILEGES;
4301         else
4302                 adapter->cmd_privileges = MIN_PRIVILEGES;
4303 }
4304
4305 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4306  * However, this HW limitation is not exposed to the host via any SLI cmd.
4307  * As a result, in the case of SRIOV and in particular multi-partition configs
4308  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4309  * for distribution between the VFs. This self-imposed limit will determine the
4310  * no: of VFs for which RSS can be enabled.
4311  */
4312 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4313 {
4314         struct be_port_resources port_res = {0};
4315         u8 rss_tables_on_port;
4316         u16 max_vfs = be_max_vfs(adapter);
4317
4318         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4319                                   RESOURCE_LIMITS, 0);
4320
4321         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4322
4323         /* Each PF Pool's RSS Tables limit =
4324          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4325          */
4326         adapter->pool_res.max_rss_tables =
4327                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4328 }
4329
4330 static int be_get_sriov_config(struct be_adapter *adapter)
4331 {
4332         struct be_resources res = {0};
4333         int max_vfs, old_vfs;
4334
4335         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4336                                   RESOURCE_LIMITS, 0);
4337
4338         /* Some old versions of BE3 FW don't report max_vfs value */
4339         if (BE3_chip(adapter) && !res.max_vfs) {
4340                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4341                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4342         }
4343
4344         adapter->pool_res = res;
4345
4346         /* If during previous unload of the driver, the VFs were not disabled,
4347          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4348          * Instead use the TotalVFs value stored in the pci-dev struct.
4349          */
4350         old_vfs = pci_num_vf(adapter->pdev);
4351         if (old_vfs) {
4352                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4353                          old_vfs);
4354
4355                 adapter->pool_res.max_vfs =
4356                         pci_sriov_get_totalvfs(adapter->pdev);
4357                 adapter->num_vfs = old_vfs;
4358         }
4359
4360         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4361                 be_calculate_pf_pool_rss_tables(adapter);
4362                 dev_info(&adapter->pdev->dev,
4363                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4364                          be_max_pf_pool_rss_tables(adapter));
4365         }
4366         return 0;
4367 }
4368
4369 static void be_alloc_sriov_res(struct be_adapter *adapter)
4370 {
4371         int old_vfs = pci_num_vf(adapter->pdev);
4372         struct  be_resources vft_res = {0};
4373         int status;
4374
4375         be_get_sriov_config(adapter);
4376
4377         if (!old_vfs)
4378                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4379
4380         /* When the HW is in SRIOV capable configuration, the PF-pool
4381          * resources are given to PF during driver load, if there are no
4382          * old VFs. This facility is not available in BE3 FW.
4383          * Also, this is done by FW in Lancer chip.
4384          */
4385         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4386                 be_calculate_vf_res(adapter, 0, &vft_res);
4387                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4388                                                  &vft_res);
4389                 if (status)
4390                         dev_err(&adapter->pdev->dev,
4391                                 "Failed to optimize SRIOV resources\n");
4392         }
4393 }
4394
4395 static int be_get_resources(struct be_adapter *adapter)
4396 {
4397         struct device *dev = &adapter->pdev->dev;
4398         struct be_resources res = {0};
4399         int status;
4400
4401         /* For Lancer, SH etc read per-function resource limits from FW.
4402          * GET_FUNC_CONFIG returns per function guaranteed limits.
4403          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4404          */
4405         if (BEx_chip(adapter)) {
4406                 BEx_get_resources(adapter, &res);
4407         } else {
4408                 status = be_cmd_get_func_config(adapter, &res);
4409                 if (status)
4410                         return status;
4411
4412                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4413                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4414                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4415                         res.max_rss_qs -= 1;
4416         }
4417
4418         /* If RoCE is supported stash away half the EQs for RoCE */
4419         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4420                                 res.max_evt_qs / 2 : res.max_evt_qs;
4421         adapter->res = res;
4422
4423         /* If FW supports RSS default queue, then skip creating non-RSS
4424          * queue for non-IP traffic.
4425          */
4426         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4427                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4428
4429         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4430                  be_max_txqs(adapter), be_max_rxqs(adapter),
4431                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4432                  be_max_vfs(adapter));
4433         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4434                  be_max_uc(adapter), be_max_mc(adapter),
4435                  be_max_vlans(adapter));
4436
4437         /* Ensure RX and TX queues are created in pairs at init time */
4438         adapter->cfg_num_rx_irqs =
4439                                 min_t(u16, netif_get_num_default_rss_queues(),
4440                                       be_max_qp_irqs(adapter));
4441         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4442         return 0;
4443 }
4444
4445 static int be_get_config(struct be_adapter *adapter)
4446 {
4447         int status, level;
4448         u16 profile_id;
4449
4450         status = be_cmd_get_cntl_attributes(adapter);
4451         if (status)
4452                 return status;
4453
4454         status = be_cmd_query_fw_cfg(adapter);
4455         if (status)
4456                 return status;
4457
4458         if (!lancer_chip(adapter) && be_physfn(adapter))
4459                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4460
4461         if (BEx_chip(adapter)) {
4462                 level = be_cmd_get_fw_log_level(adapter);
4463                 adapter->msg_enable =
4464                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4465         }
4466
4467         be_cmd_get_acpi_wol_cap(adapter);
4468         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4469         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4470
4471         be_cmd_query_port_name(adapter);
4472
4473         if (be_physfn(adapter)) {
4474                 status = be_cmd_get_active_profile(adapter, &profile_id);
4475                 if (!status)
4476                         dev_info(&adapter->pdev->dev,
4477                                  "Using profile 0x%x\n", profile_id);
4478         }
4479
4480         return 0;
4481 }
4482
4483 static int be_mac_setup(struct be_adapter *adapter)
4484 {
4485         u8 mac[ETH_ALEN];
4486         int status;
4487
4488         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4489                 status = be_cmd_get_perm_mac(adapter, mac);
4490                 if (status)
4491                         return status;
4492
4493                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4494                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4495         }
4496
4497         return 0;
4498 }
4499
4500 static void be_schedule_worker(struct be_adapter *adapter)
4501 {
4502         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4503         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4504 }
4505
4506 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4507 {
4508         schedule_delayed_work(&adapter->be_err_detection_work,
4509                               msecs_to_jiffies(delay));
4510         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4511 }
4512
4513 static int be_setup_queues(struct be_adapter *adapter)
4514 {
4515         struct net_device *netdev = adapter->netdev;
4516         int status;
4517
4518         status = be_evt_queues_create(adapter);
4519         if (status)
4520                 goto err;
4521
4522         status = be_tx_qs_create(adapter);
4523         if (status)
4524                 goto err;
4525
4526         status = be_rx_cqs_create(adapter);
4527         if (status)
4528                 goto err;
4529
4530         status = be_mcc_queues_create(adapter);
4531         if (status)
4532                 goto err;
4533
4534         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4535         if (status)
4536                 goto err;
4537
4538         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4539         if (status)
4540                 goto err;
4541
4542         return 0;
4543 err:
4544         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4545         return status;
4546 }
4547
4548 static int be_if_create(struct be_adapter *adapter)
4549 {
4550         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4551         u32 cap_flags = be_if_cap_flags(adapter);
4552         int status;
4553
4554         /* alloc required memory for other filtering fields */
4555         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4556                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4557         if (!adapter->pmac_id)
4558                 return -ENOMEM;
4559
4560         adapter->mc_list = kcalloc(be_max_mc(adapter),
4561                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4562         if (!adapter->mc_list)
4563                 return -ENOMEM;
4564
4565         adapter->uc_list = kcalloc(be_max_uc(adapter),
4566                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4567         if (!adapter->uc_list)
4568                 return -ENOMEM;
4569
4570         if (adapter->cfg_num_rx_irqs == 1)
4571                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4572
4573         en_flags &= cap_flags;
4574         /* will enable all the needed filter flags in be_open() */
4575         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4576                                   &adapter->if_handle, 0);
4577
4578         if (status)
4579                 return status;
4580
4581         return 0;
4582 }
4583
4584 int be_update_queues(struct be_adapter *adapter)
4585 {
4586         struct net_device *netdev = adapter->netdev;
4587         int status;
4588
4589         if (netif_running(netdev))
4590                 be_close(netdev);
4591
4592         be_cancel_worker(adapter);
4593
4594         /* If any vectors have been shared with RoCE we cannot re-program
4595          * the MSIx table.
4596          */
4597         if (!adapter->num_msix_roce_vec)
4598                 be_msix_disable(adapter);
4599
4600         be_clear_queues(adapter);
4601         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4602         if (status)
4603                 return status;
4604
4605         if (!msix_enabled(adapter)) {
4606                 status = be_msix_enable(adapter);
4607                 if (status)
4608                         return status;
4609         }
4610
4611         status = be_if_create(adapter);
4612         if (status)
4613                 return status;
4614
4615         status = be_setup_queues(adapter);
4616         if (status)
4617                 return status;
4618
4619         be_schedule_worker(adapter);
4620
4621         if (netif_running(netdev))
4622                 status = be_open(netdev);
4623
4624         return status;
4625 }
4626
4627 static inline int fw_major_num(const char *fw_ver)
4628 {
4629         int fw_major = 0, i;
4630
4631         i = sscanf(fw_ver, "%d.", &fw_major);
4632         if (i != 1)
4633                 return 0;
4634
4635         return fw_major;
4636 }
4637
4638 /* If any VFs are already enabled don't FLR the PF */
4639 static bool be_reset_required(struct be_adapter *adapter)
4640 {
4641         return pci_num_vf(adapter->pdev) ? false : true;
4642 }
4643
4644 /* Wait for the FW to be ready and perform the required initialization */
4645 static int be_func_init(struct be_adapter *adapter)
4646 {
4647         int status;
4648
4649         status = be_fw_wait_ready(adapter);
4650         if (status)
4651                 return status;
4652
4653         if (be_reset_required(adapter)) {
4654                 status = be_cmd_reset_function(adapter);
4655                 if (status)
4656                         return status;
4657
4658                 /* Wait for interrupts to quiesce after an FLR */
4659                 msleep(100);
4660
4661                 /* We can clear all errors when function reset succeeds */
4662                 be_clear_error(adapter, BE_CLEAR_ALL);
4663         }
4664
4665         /* Tell FW we're ready to fire cmds */
4666         status = be_cmd_fw_init(adapter);
4667         if (status)
4668                 return status;
4669
4670         /* Allow interrupts for other ULPs running on NIC function */
4671         be_intr_set(adapter, true);
4672
4673         return 0;
4674 }
4675
4676 static int be_setup(struct be_adapter *adapter)
4677 {
4678         struct device *dev = &adapter->pdev->dev;
4679         int status;
4680
4681         status = be_func_init(adapter);
4682         if (status)
4683                 return status;
4684
4685         be_setup_init(adapter);
4686
4687         if (!lancer_chip(adapter))
4688                 be_cmd_req_native_mode(adapter);
4689
4690         /* invoke this cmd first to get pf_num and vf_num which are needed
4691          * for issuing profile related cmds
4692          */
4693         if (!BEx_chip(adapter)) {
4694                 status = be_cmd_get_func_config(adapter, NULL);
4695                 if (status)
4696                         return status;
4697         }
4698
4699         status = be_get_config(adapter);
4700         if (status)
4701                 goto err;
4702
4703         if (!BE2_chip(adapter) && be_physfn(adapter))
4704                 be_alloc_sriov_res(adapter);
4705
4706         status = be_get_resources(adapter);
4707         if (status)
4708                 goto err;
4709
4710         status = be_msix_enable(adapter);
4711         if (status)
4712                 goto err;
4713
4714         /* will enable all the needed filter flags in be_open() */
4715         status = be_if_create(adapter);
4716         if (status)
4717                 goto err;
4718
4719         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4720         rtnl_lock();
4721         status = be_setup_queues(adapter);
4722         rtnl_unlock();
4723         if (status)
4724                 goto err;
4725
4726         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4727
4728         status = be_mac_setup(adapter);
4729         if (status)
4730                 goto err;
4731
4732         be_cmd_get_fw_ver(adapter);
4733         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4734
4735         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4736                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4737                         adapter->fw_ver);
4738                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4739         }
4740
4741         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4742                                          adapter->rx_fc);
4743         if (status)
4744                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4745                                         &adapter->rx_fc);
4746
4747         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4748                  adapter->tx_fc, adapter->rx_fc);
4749
4750         if (be_physfn(adapter))
4751                 be_cmd_set_logical_link_config(adapter,
4752                                                IFLA_VF_LINK_STATE_AUTO, 0);
4753
4754         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4755          * confusing a linux bridge or OVS that it might be connected to.
4756          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4757          * when SRIOV is not enabled.
4758          */
4759         if (BE3_chip(adapter))
4760                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4761                                       PORT_FWD_TYPE_PASSTHRU, 0);
4762
4763         if (adapter->num_vfs)
4764                 be_vf_setup(adapter);
4765
4766         status = be_cmd_get_phy_info(adapter);
4767         if (!status && be_pause_supported(adapter))
4768                 adapter->phy.fc_autoneg = 1;
4769
4770         be_schedule_worker(adapter);
4771         adapter->flags |= BE_FLAGS_SETUP_DONE;
4772         return 0;
4773 err:
4774         be_clear(adapter);
4775         return status;
4776 }
4777
4778 #ifdef CONFIG_NET_POLL_CONTROLLER
4779 static void be_netpoll(struct net_device *netdev)
4780 {
4781         struct be_adapter *adapter = netdev_priv(netdev);
4782         struct be_eq_obj *eqo;
4783         int i;
4784
4785         for_all_evt_queues(adapter, eqo, i) {
4786                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4787                 napi_schedule(&eqo->napi);
4788         }
4789 }
4790 #endif
4791
4792 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4793 {
4794         const struct firmware *fw;
4795         int status;
4796
4797         if (!netif_running(adapter->netdev)) {
4798                 dev_err(&adapter->pdev->dev,
4799                         "Firmware load not allowed (interface is down)\n");
4800                 return -ENETDOWN;
4801         }
4802
4803         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4804         if (status)
4805                 goto fw_exit;
4806
4807         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4808
4809         if (lancer_chip(adapter))
4810                 status = lancer_fw_download(adapter, fw);
4811         else
4812                 status = be_fw_download(adapter, fw);
4813
4814         if (!status)
4815                 be_cmd_get_fw_ver(adapter);
4816
4817 fw_exit:
4818         release_firmware(fw);
4819         return status;
4820 }
4821
4822 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4823                                  u16 flags)
4824 {
4825         struct be_adapter *adapter = netdev_priv(dev);
4826         struct nlattr *attr, *br_spec;
4827         int rem;
4828         int status = 0;
4829         u16 mode = 0;
4830
4831         if (!sriov_enabled(adapter))
4832                 return -EOPNOTSUPP;
4833
4834         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4835         if (!br_spec)
4836                 return -EINVAL;
4837
4838         nla_for_each_nested(attr, br_spec, rem) {
4839                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4840                         continue;
4841
4842                 if (nla_len(attr) < sizeof(mode))
4843                         return -EINVAL;
4844
4845                 mode = nla_get_u16(attr);
4846                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4847                         return -EOPNOTSUPP;
4848
4849                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4850                         return -EINVAL;
4851
4852                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4853                                                adapter->if_handle,
4854                                                mode == BRIDGE_MODE_VEPA ?
4855                                                PORT_FWD_TYPE_VEPA :
4856                                                PORT_FWD_TYPE_VEB, 0);
4857                 if (status)
4858                         goto err;
4859
4860                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4861                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4862
4863                 return status;
4864         }
4865 err:
4866         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4867                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4868
4869         return status;
4870 }
4871
4872 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4873                                  struct net_device *dev, u32 filter_mask,
4874                                  int nlflags)
4875 {
4876         struct be_adapter *adapter = netdev_priv(dev);
4877         int status = 0;
4878         u8 hsw_mode;
4879
4880         /* BE and Lancer chips support VEB mode only */
4881         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4882                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4883                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4884                         return 0;
4885                 hsw_mode = PORT_FWD_TYPE_VEB;
4886         } else {
4887                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4888                                                adapter->if_handle, &hsw_mode,
4889                                                NULL);
4890                 if (status)
4891                         return 0;
4892
4893                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4894                         return 0;
4895         }
4896
4897         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4898                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4899                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4900                                        0, 0, nlflags, filter_mask, NULL);
4901 }
4902
4903 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4904                                          void (*func)(struct work_struct *))
4905 {
4906         struct be_cmd_work *work;
4907
4908         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4909         if (!work) {
4910                 dev_err(&adapter->pdev->dev,
4911                         "be_work memory allocation failed\n");
4912                 return NULL;
4913         }
4914
4915         INIT_WORK(&work->work, func);
4916         work->adapter = adapter;
4917         return work;
4918 }
4919
4920 /* VxLAN offload Notes:
4921  *
4922  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4923  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4924  * is expected to work across all types of IP tunnels once exported. Skyhawk
4925  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4926  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4927  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4928  * those other tunnels are unexported on the fly through ndo_features_check().
4929  *
4930  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4931  * adds more than one port, disable offloads and don't re-enable them again
4932  * until after all the tunnels are removed.
4933  */
4934 static void be_work_add_vxlan_port(struct work_struct *work)
4935 {
4936         struct be_cmd_work *cmd_work =
4937                                 container_of(work, struct be_cmd_work, work);
4938         struct be_adapter *adapter = cmd_work->adapter;
4939         struct net_device *netdev = adapter->netdev;
4940         struct device *dev = &adapter->pdev->dev;
4941         __be16 port = cmd_work->info.vxlan_port;
4942         int status;
4943
4944         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4945                 adapter->vxlan_port_aliases++;
4946                 goto done;
4947         }
4948
4949         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4950                 dev_info(dev,
4951                          "Only one UDP port supported for VxLAN offloads\n");
4952                 dev_info(dev, "Disabling VxLAN offloads\n");
4953                 adapter->vxlan_port_count++;
4954                 goto err;
4955         }
4956
4957         if (adapter->vxlan_port_count++ >= 1)
4958                 goto done;
4959
4960         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4961                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4962         if (status) {
4963                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4964                 goto err;
4965         }
4966
4967         status = be_cmd_set_vxlan_port(adapter, port);
4968         if (status) {
4969                 dev_warn(dev, "Failed to add VxLAN port\n");
4970                 goto err;
4971         }
4972         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4973         adapter->vxlan_port = port;
4974
4975         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4976                                    NETIF_F_TSO | NETIF_F_TSO6 |
4977                                    NETIF_F_GSO_UDP_TUNNEL;
4978         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4979         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4980
4981         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4982                  be16_to_cpu(port));
4983         goto done;
4984 err:
4985         be_disable_vxlan_offloads(adapter);
4986 done:
4987         kfree(cmd_work);
4988 }
4989
4990 static void be_work_del_vxlan_port(struct work_struct *work)
4991 {
4992         struct be_cmd_work *cmd_work =
4993                                 container_of(work, struct be_cmd_work, work);
4994         struct be_adapter *adapter = cmd_work->adapter;
4995         __be16 port = cmd_work->info.vxlan_port;
4996
4997         if (adapter->vxlan_port != port)
4998                 goto done;
4999
5000         if (adapter->vxlan_port_aliases) {
5001                 adapter->vxlan_port_aliases--;
5002                 goto out;
5003         }
5004
5005         be_disable_vxlan_offloads(adapter);
5006
5007         dev_info(&adapter->pdev->dev,
5008                  "Disabled VxLAN offloads for UDP port %d\n",
5009                  be16_to_cpu(port));
5010 done:
5011         adapter->vxlan_port_count--;
5012 out:
5013         kfree(cmd_work);
5014 }
5015
5016 static void be_cfg_vxlan_port(struct net_device *netdev,
5017                               struct udp_tunnel_info *ti,
5018                               void (*func)(struct work_struct *))
5019 {
5020         struct be_adapter *adapter = netdev_priv(netdev);
5021         struct be_cmd_work *cmd_work;
5022
5023         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5024                 return;
5025
5026         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5027                 return;
5028
5029         cmd_work = be_alloc_work(adapter, func);
5030         if (cmd_work) {
5031                 cmd_work->info.vxlan_port = ti->port;
5032                 queue_work(be_wq, &cmd_work->work);
5033         }
5034 }
5035
5036 static void be_del_vxlan_port(struct net_device *netdev,
5037                               struct udp_tunnel_info *ti)
5038 {
5039         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5040 }
5041
5042 static void be_add_vxlan_port(struct net_device *netdev,
5043                               struct udp_tunnel_info *ti)
5044 {
5045         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5046 }
5047
5048 static netdev_features_t be_features_check(struct sk_buff *skb,
5049                                            struct net_device *dev,
5050                                            netdev_features_t features)
5051 {
5052         struct be_adapter *adapter = netdev_priv(dev);
5053         u8 l4_hdr = 0;
5054
5055         /* The code below restricts offload features for some tunneled packets.
5056          * Offload features for normal (non tunnel) packets are unchanged.
5057          */
5058         if (!skb->encapsulation ||
5059             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5060                 return features;
5061
5062         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5063          * should disable tunnel offload features if it's not a VxLAN packet,
5064          * as tunnel offloads have been enabled only for VxLAN. This is done to
5065          * allow other tunneled traffic like GRE work fine while VxLAN
5066          * offloads are configured in Skyhawk-R.
5067          */
5068         switch (vlan_get_protocol(skb)) {
5069         case htons(ETH_P_IP):
5070                 l4_hdr = ip_hdr(skb)->protocol;
5071                 break;
5072         case htons(ETH_P_IPV6):
5073                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5074                 break;
5075         default:
5076                 return features;
5077         }
5078
5079         if (l4_hdr != IPPROTO_UDP ||
5080             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5081             skb->inner_protocol != htons(ETH_P_TEB) ||
5082             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5083             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
5084                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5085
5086         return features;
5087 }
5088
5089 static int be_get_phys_port_id(struct net_device *dev,
5090                                struct netdev_phys_item_id *ppid)
5091 {
5092         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5093         struct be_adapter *adapter = netdev_priv(dev);
5094         u8 *id;
5095
5096         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5097                 return -ENOSPC;
5098
5099         ppid->id[0] = adapter->hba_port_num + 1;
5100         id = &ppid->id[1];
5101         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5102              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5103                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5104
5105         ppid->id_len = id_len;
5106
5107         return 0;
5108 }
5109
5110 static void be_set_rx_mode(struct net_device *dev)
5111 {
5112         struct be_adapter *adapter = netdev_priv(dev);
5113         struct be_cmd_work *work;
5114
5115         work = be_alloc_work(adapter, be_work_set_rx_mode);
5116         if (work)
5117                 queue_work(be_wq, &work->work);
5118 }
5119
5120 static const struct net_device_ops be_netdev_ops = {
5121         .ndo_open               = be_open,
5122         .ndo_stop               = be_close,
5123         .ndo_start_xmit         = be_xmit,
5124         .ndo_set_rx_mode        = be_set_rx_mode,
5125         .ndo_set_mac_address    = be_mac_addr_set,
5126         .ndo_change_mtu         = be_change_mtu,
5127         .ndo_get_stats64        = be_get_stats64,
5128         .ndo_validate_addr      = eth_validate_addr,
5129         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5130         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5131         .ndo_set_vf_mac         = be_set_vf_mac,
5132         .ndo_set_vf_vlan        = be_set_vf_vlan,
5133         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5134         .ndo_get_vf_config      = be_get_vf_config,
5135         .ndo_set_vf_link_state  = be_set_vf_link_state,
5136         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5137 #ifdef CONFIG_NET_POLL_CONTROLLER
5138         .ndo_poll_controller    = be_netpoll,
5139 #endif
5140         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5141         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5142 #ifdef CONFIG_NET_RX_BUSY_POLL
5143         .ndo_busy_poll          = be_busy_poll,
5144 #endif
5145         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5146         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5147         .ndo_features_check     = be_features_check,
5148         .ndo_get_phys_port_id   = be_get_phys_port_id,
5149 };
5150
5151 static void be_netdev_init(struct net_device *netdev)
5152 {
5153         struct be_adapter *adapter = netdev_priv(netdev);
5154
5155         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5156                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5157                 NETIF_F_HW_VLAN_CTAG_TX;
5158         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5159                 netdev->hw_features |= NETIF_F_RXHASH;
5160
5161         netdev->features |= netdev->hw_features |
5162                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5163
5164         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5165                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5166
5167         netdev->priv_flags |= IFF_UNICAST_FLT;
5168
5169         netdev->flags |= IFF_MULTICAST;
5170
5171         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5172
5173         netdev->netdev_ops = &be_netdev_ops;
5174
5175         netdev->ethtool_ops = &be_ethtool_ops;
5176 }
5177
5178 static void be_cleanup(struct be_adapter *adapter)
5179 {
5180         struct net_device *netdev = adapter->netdev;
5181
5182         rtnl_lock();
5183         netif_device_detach(netdev);
5184         if (netif_running(netdev))
5185                 be_close(netdev);
5186         rtnl_unlock();
5187
5188         be_clear(adapter);
5189 }
5190
5191 static int be_resume(struct be_adapter *adapter)
5192 {
5193         struct net_device *netdev = adapter->netdev;
5194         int status;
5195
5196         status = be_setup(adapter);
5197         if (status)
5198                 return status;
5199
5200         rtnl_lock();
5201         if (netif_running(netdev))
5202                 status = be_open(netdev);
5203         rtnl_unlock();
5204
5205         if (status)
5206                 return status;
5207
5208         netif_device_attach(netdev);
5209
5210         return 0;
5211 }
5212
5213 static int be_err_recover(struct be_adapter *adapter)
5214 {
5215         int status;
5216
5217         /* Error recovery is supported only Lancer as of now */
5218         if (!lancer_chip(adapter))
5219                 return -EIO;
5220
5221         /* Wait for adapter to reach quiescent state before
5222          * destroying queues
5223          */
5224         status = be_fw_wait_ready(adapter);
5225         if (status)
5226                 goto err;
5227
5228         be_cleanup(adapter);
5229
5230         status = be_resume(adapter);
5231         if (status)
5232                 goto err;
5233
5234         return 0;
5235 err:
5236         return status;
5237 }
5238
5239 static void be_err_detection_task(struct work_struct *work)
5240 {
5241         struct be_adapter *adapter =
5242                                 container_of(work, struct be_adapter,
5243                                              be_err_detection_work.work);
5244         struct device *dev = &adapter->pdev->dev;
5245         int recovery_status;
5246         int delay = ERR_DETECTION_DELAY;
5247
5248         be_detect_error(adapter);
5249
5250         if (be_check_error(adapter, BE_ERROR_HW))
5251                 recovery_status = be_err_recover(adapter);
5252         else
5253                 goto reschedule_task;
5254
5255         if (!recovery_status) {
5256                 adapter->recovery_retries = 0;
5257                 dev_info(dev, "Adapter recovery successful\n");
5258                 goto reschedule_task;
5259         } else if (be_virtfn(adapter)) {
5260                 /* For VFs, check if PF have allocated resources
5261                  * every second.
5262                  */
5263                 dev_err(dev, "Re-trying adapter recovery\n");
5264                 goto reschedule_task;
5265         } else if (adapter->recovery_retries++ <
5266                    MAX_ERR_RECOVERY_RETRY_COUNT) {
5267                 /* In case of another error during recovery, it takes 30 sec
5268                  * for adapter to come out of error. Retry error recovery after
5269                  * this time interval.
5270                  */
5271                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5272                 delay = ERR_RECOVERY_RETRY_DELAY;
5273                 goto reschedule_task;
5274         } else {
5275                 dev_err(dev, "Adapter recovery failed\n");
5276         }
5277
5278         return;
5279 reschedule_task:
5280         be_schedule_err_detection(adapter, delay);
5281 }
5282
5283 static void be_log_sfp_info(struct be_adapter *adapter)
5284 {
5285         int status;
5286
5287         status = be_cmd_query_sfp_info(adapter);
5288         if (!status) {
5289                 dev_err(&adapter->pdev->dev,
5290                         "Port %c: %s Vendor: %s part no: %s",
5291                         adapter->port_name,
5292                         be_misconfig_evt_port_state[adapter->phy_state],
5293                         adapter->phy.vendor_name,
5294                         adapter->phy.vendor_pn);
5295         }
5296         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5297 }
5298
5299 static void be_worker(struct work_struct *work)
5300 {
5301         struct be_adapter *adapter =
5302                 container_of(work, struct be_adapter, work.work);
5303         struct be_rx_obj *rxo;
5304         int i;
5305
5306         if (be_physfn(adapter) &&
5307             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5308                 be_cmd_get_die_temperature(adapter);
5309
5310         /* when interrupts are not yet enabled, just reap any pending
5311          * mcc completions
5312          */
5313         if (!netif_running(adapter->netdev)) {
5314                 local_bh_disable();
5315                 be_process_mcc(adapter);
5316                 local_bh_enable();
5317                 goto reschedule;
5318         }
5319
5320         if (!adapter->stats_cmd_sent) {
5321                 if (lancer_chip(adapter))
5322                         lancer_cmd_get_pport_stats(adapter,
5323                                                    &adapter->stats_cmd);
5324                 else
5325                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5326         }
5327
5328         for_all_rx_queues(adapter, rxo, i) {
5329                 /* Replenish RX-queues starved due to memory
5330                  * allocation failures.
5331                  */
5332                 if (rxo->rx_post_starved)
5333                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5334         }
5335
5336         /* EQ-delay update for Skyhawk is done while notifying EQ */
5337         if (!skyhawk_chip(adapter))
5338                 be_eqd_update(adapter, false);
5339
5340         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5341                 be_log_sfp_info(adapter);
5342
5343 reschedule:
5344         adapter->work_counter++;
5345         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5346 }
5347
5348 static void be_unmap_pci_bars(struct be_adapter *adapter)
5349 {
5350         if (adapter->csr)
5351                 pci_iounmap(adapter->pdev, adapter->csr);
5352         if (adapter->db)
5353                 pci_iounmap(adapter->pdev, adapter->db);
5354         if (adapter->pcicfg && adapter->pcicfg_mapped)
5355                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5356 }
5357
5358 static int db_bar(struct be_adapter *adapter)
5359 {
5360         if (lancer_chip(adapter) || be_virtfn(adapter))
5361                 return 0;
5362         else
5363                 return 4;
5364 }
5365
5366 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5367 {
5368         if (skyhawk_chip(adapter)) {
5369                 adapter->roce_db.size = 4096;
5370                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5371                                                               db_bar(adapter));
5372                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5373                                                                db_bar(adapter));
5374         }
5375         return 0;
5376 }
5377
5378 static int be_map_pci_bars(struct be_adapter *adapter)
5379 {
5380         struct pci_dev *pdev = adapter->pdev;
5381         u8 __iomem *addr;
5382         u32 sli_intf;
5383
5384         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5385         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5386                                 SLI_INTF_FAMILY_SHIFT;
5387         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5388
5389         if (BEx_chip(adapter) && be_physfn(adapter)) {
5390                 adapter->csr = pci_iomap(pdev, 2, 0);
5391                 if (!adapter->csr)
5392                         return -ENOMEM;
5393         }
5394
5395         addr = pci_iomap(pdev, db_bar(adapter), 0);
5396         if (!addr)
5397                 goto pci_map_err;
5398         adapter->db = addr;
5399
5400         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5401                 if (be_physfn(adapter)) {
5402                         /* PCICFG is the 2nd BAR in BE2 */
5403                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5404                         if (!addr)
5405                                 goto pci_map_err;
5406                         adapter->pcicfg = addr;
5407                         adapter->pcicfg_mapped = true;
5408                 } else {
5409                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5410                         adapter->pcicfg_mapped = false;
5411                 }
5412         }
5413
5414         be_roce_map_pci_bars(adapter);
5415         return 0;
5416
5417 pci_map_err:
5418         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5419         be_unmap_pci_bars(adapter);
5420         return -ENOMEM;
5421 }
5422
5423 static void be_drv_cleanup(struct be_adapter *adapter)
5424 {
5425         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5426         struct device *dev = &adapter->pdev->dev;
5427
5428         if (mem->va)
5429                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5430
5431         mem = &adapter->rx_filter;
5432         if (mem->va)
5433                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5434
5435         mem = &adapter->stats_cmd;
5436         if (mem->va)
5437                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5438 }
5439
5440 /* Allocate and initialize various fields in be_adapter struct */
5441 static int be_drv_init(struct be_adapter *adapter)
5442 {
5443         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5444         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5445         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5446         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5447         struct device *dev = &adapter->pdev->dev;
5448         int status = 0;
5449
5450         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5451         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5452                                                  &mbox_mem_alloc->dma,
5453                                                  GFP_KERNEL);
5454         if (!mbox_mem_alloc->va)
5455                 return -ENOMEM;
5456
5457         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5458         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5459         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5460
5461         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5462         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5463                                             &rx_filter->dma, GFP_KERNEL);
5464         if (!rx_filter->va) {
5465                 status = -ENOMEM;
5466                 goto free_mbox;
5467         }
5468
5469         if (lancer_chip(adapter))
5470                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5471         else if (BE2_chip(adapter))
5472                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5473         else if (BE3_chip(adapter))
5474                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5475         else
5476                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5477         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5478                                             &stats_cmd->dma, GFP_KERNEL);
5479         if (!stats_cmd->va) {
5480                 status = -ENOMEM;
5481                 goto free_rx_filter;
5482         }
5483
5484         mutex_init(&adapter->mbox_lock);
5485         mutex_init(&adapter->mcc_lock);
5486         mutex_init(&adapter->rx_filter_lock);
5487         spin_lock_init(&adapter->mcc_cq_lock);
5488         init_completion(&adapter->et_cmd_compl);
5489
5490         pci_save_state(adapter->pdev);
5491
5492         INIT_DELAYED_WORK(&adapter->work, be_worker);
5493         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5494                           be_err_detection_task);
5495
5496         adapter->rx_fc = true;
5497         adapter->tx_fc = true;
5498
5499         /* Must be a power of 2 or else MODULO will BUG_ON */
5500         adapter->be_get_temp_freq = 64;
5501
5502         return 0;
5503
5504 free_rx_filter:
5505         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5506 free_mbox:
5507         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5508                           mbox_mem_alloc->dma);
5509         return status;
5510 }
5511
5512 static void be_remove(struct pci_dev *pdev)
5513 {
5514         struct be_adapter *adapter = pci_get_drvdata(pdev);
5515
5516         if (!adapter)
5517                 return;
5518
5519         be_roce_dev_remove(adapter);
5520         be_intr_set(adapter, false);
5521
5522         be_cancel_err_detection(adapter);
5523
5524         unregister_netdev(adapter->netdev);
5525
5526         be_clear(adapter);
5527
5528         /* tell fw we're done with firing cmds */
5529         be_cmd_fw_clean(adapter);
5530
5531         be_unmap_pci_bars(adapter);
5532         be_drv_cleanup(adapter);
5533
5534         pci_disable_pcie_error_reporting(pdev);
5535
5536         pci_release_regions(pdev);
5537         pci_disable_device(pdev);
5538
5539         free_netdev(adapter->netdev);
5540 }
5541
5542 static ssize_t be_hwmon_show_temp(struct device *dev,
5543                                   struct device_attribute *dev_attr,
5544                                   char *buf)
5545 {
5546         struct be_adapter *adapter = dev_get_drvdata(dev);
5547
5548         /* Unit: millidegree Celsius */
5549         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5550                 return -EIO;
5551         else
5552                 return sprintf(buf, "%u\n",
5553                                adapter->hwmon_info.be_on_die_temp * 1000);
5554 }
5555
5556 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5557                           be_hwmon_show_temp, NULL, 1);
5558
5559 static struct attribute *be_hwmon_attrs[] = {
5560         &sensor_dev_attr_temp1_input.dev_attr.attr,
5561         NULL
5562 };
5563
5564 ATTRIBUTE_GROUPS(be_hwmon);
5565
5566 static char *mc_name(struct be_adapter *adapter)
5567 {
5568         char *str = ""; /* default */
5569
5570         switch (adapter->mc_type) {
5571         case UMC:
5572                 str = "UMC";
5573                 break;
5574         case FLEX10:
5575                 str = "FLEX10";
5576                 break;
5577         case vNIC1:
5578                 str = "vNIC-1";
5579                 break;
5580         case nPAR:
5581                 str = "nPAR";
5582                 break;
5583         case UFP:
5584                 str = "UFP";
5585                 break;
5586         case vNIC2:
5587                 str = "vNIC-2";
5588                 break;
5589         default:
5590                 str = "";
5591         }
5592
5593         return str;
5594 }
5595
5596 static inline char *func_name(struct be_adapter *adapter)
5597 {
5598         return be_physfn(adapter) ? "PF" : "VF";
5599 }
5600
5601 static inline char *nic_name(struct pci_dev *pdev)
5602 {
5603         switch (pdev->device) {
5604         case OC_DEVICE_ID1:
5605                 return OC_NAME;
5606         case OC_DEVICE_ID2:
5607                 return OC_NAME_BE;
5608         case OC_DEVICE_ID3:
5609         case OC_DEVICE_ID4:
5610                 return OC_NAME_LANCER;
5611         case BE_DEVICE_ID2:
5612                 return BE3_NAME;
5613         case OC_DEVICE_ID5:
5614         case OC_DEVICE_ID6:
5615                 return OC_NAME_SH;
5616         default:
5617                 return BE_NAME;
5618         }
5619 }
5620
5621 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5622 {
5623         struct be_adapter *adapter;
5624         struct net_device *netdev;
5625         int status = 0;
5626
5627         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5628
5629         status = pci_enable_device(pdev);
5630         if (status)
5631                 goto do_none;
5632
5633         status = pci_request_regions(pdev, DRV_NAME);
5634         if (status)
5635                 goto disable_dev;
5636         pci_set_master(pdev);
5637
5638         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5639         if (!netdev) {
5640                 status = -ENOMEM;
5641                 goto rel_reg;
5642         }
5643         adapter = netdev_priv(netdev);
5644         adapter->pdev = pdev;
5645         pci_set_drvdata(pdev, adapter);
5646         adapter->netdev = netdev;
5647         SET_NETDEV_DEV(netdev, &pdev->dev);
5648
5649         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5650         if (!status) {
5651                 netdev->features |= NETIF_F_HIGHDMA;
5652         } else {
5653                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5654                 if (status) {
5655                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5656                         goto free_netdev;
5657                 }
5658         }
5659
5660         status = pci_enable_pcie_error_reporting(pdev);
5661         if (!status)
5662                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5663
5664         status = be_map_pci_bars(adapter);
5665         if (status)
5666                 goto free_netdev;
5667
5668         status = be_drv_init(adapter);
5669         if (status)
5670                 goto unmap_bars;
5671
5672         status = be_setup(adapter);
5673         if (status)
5674                 goto drv_cleanup;
5675
5676         be_netdev_init(netdev);
5677         status = register_netdev(netdev);
5678         if (status != 0)
5679                 goto unsetup;
5680
5681         be_roce_dev_add(adapter);
5682
5683         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5684
5685         /* On Die temperature not supported for VF. */
5686         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5687                 adapter->hwmon_info.hwmon_dev =
5688                         devm_hwmon_device_register_with_groups(&pdev->dev,
5689                                                                DRV_NAME,
5690                                                                adapter,
5691                                                                be_hwmon_groups);
5692                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5693         }
5694
5695         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5696                  func_name(adapter), mc_name(adapter), adapter->port_name);
5697
5698         return 0;
5699
5700 unsetup:
5701         be_clear(adapter);
5702 drv_cleanup:
5703         be_drv_cleanup(adapter);
5704 unmap_bars:
5705         be_unmap_pci_bars(adapter);
5706 free_netdev:
5707         free_netdev(netdev);
5708 rel_reg:
5709         pci_release_regions(pdev);
5710 disable_dev:
5711         pci_disable_device(pdev);
5712 do_none:
5713         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5714         return status;
5715 }
5716
5717 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5718 {
5719         struct be_adapter *adapter = pci_get_drvdata(pdev);
5720
5721         be_intr_set(adapter, false);
5722         be_cancel_err_detection(adapter);
5723
5724         be_cleanup(adapter);
5725
5726         pci_save_state(pdev);
5727         pci_disable_device(pdev);
5728         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5729         return 0;
5730 }
5731
5732 static int be_pci_resume(struct pci_dev *pdev)
5733 {
5734         struct be_adapter *adapter = pci_get_drvdata(pdev);
5735         int status = 0;
5736
5737         status = pci_enable_device(pdev);
5738         if (status)
5739                 return status;
5740
5741         pci_restore_state(pdev);
5742
5743         status = be_resume(adapter);
5744         if (status)
5745                 return status;
5746
5747         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5748
5749         return 0;
5750 }
5751
5752 /*
5753  * An FLR will stop BE from DMAing any data.
5754  */
5755 static void be_shutdown(struct pci_dev *pdev)
5756 {
5757         struct be_adapter *adapter = pci_get_drvdata(pdev);
5758
5759         if (!adapter)
5760                 return;
5761
5762         be_roce_dev_shutdown(adapter);
5763         cancel_delayed_work_sync(&adapter->work);
5764         be_cancel_err_detection(adapter);
5765
5766         netif_device_detach(adapter->netdev);
5767
5768         be_cmd_reset_function(adapter);
5769
5770         pci_disable_device(pdev);
5771 }
5772
5773 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5774                                             pci_channel_state_t state)
5775 {
5776         struct be_adapter *adapter = pci_get_drvdata(pdev);
5777
5778         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5779
5780         be_roce_dev_remove(adapter);
5781
5782         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5783                 be_set_error(adapter, BE_ERROR_EEH);
5784
5785                 be_cancel_err_detection(adapter);
5786
5787                 be_cleanup(adapter);
5788         }
5789
5790         if (state == pci_channel_io_perm_failure)
5791                 return PCI_ERS_RESULT_DISCONNECT;
5792
5793         pci_disable_device(pdev);
5794
5795         /* The error could cause the FW to trigger a flash debug dump.
5796          * Resetting the card while flash dump is in progress
5797          * can cause it not to recover; wait for it to finish.
5798          * Wait only for first function as it is needed only once per
5799          * adapter.
5800          */
5801         if (pdev->devfn == 0)
5802                 ssleep(30);
5803
5804         return PCI_ERS_RESULT_NEED_RESET;
5805 }
5806
5807 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5808 {
5809         struct be_adapter *adapter = pci_get_drvdata(pdev);
5810         int status;
5811
5812         dev_info(&adapter->pdev->dev, "EEH reset\n");
5813
5814         status = pci_enable_device(pdev);
5815         if (status)
5816                 return PCI_ERS_RESULT_DISCONNECT;
5817
5818         pci_set_master(pdev);
5819         pci_restore_state(pdev);
5820
5821         /* Check if card is ok and fw is ready */
5822         dev_info(&adapter->pdev->dev,
5823                  "Waiting for FW to be ready after EEH reset\n");
5824         status = be_fw_wait_ready(adapter);
5825         if (status)
5826                 return PCI_ERS_RESULT_DISCONNECT;
5827
5828         pci_cleanup_aer_uncorrect_error_status(pdev);
5829         be_clear_error(adapter, BE_CLEAR_ALL);
5830         return PCI_ERS_RESULT_RECOVERED;
5831 }
5832
5833 static void be_eeh_resume(struct pci_dev *pdev)
5834 {
5835         int status = 0;
5836         struct be_adapter *adapter = pci_get_drvdata(pdev);
5837
5838         dev_info(&adapter->pdev->dev, "EEH resume\n");
5839
5840         pci_save_state(pdev);
5841
5842         status = be_resume(adapter);
5843         if (status)
5844                 goto err;
5845
5846         be_roce_dev_add(adapter);
5847
5848         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5849         return;
5850 err:
5851         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5852 }
5853
5854 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5855 {
5856         struct be_adapter *adapter = pci_get_drvdata(pdev);
5857         struct be_resources vft_res = {0};
5858         int status;
5859
5860         if (!num_vfs)
5861                 be_vf_clear(adapter);
5862
5863         adapter->num_vfs = num_vfs;
5864
5865         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5866                 dev_warn(&pdev->dev,
5867                          "Cannot disable VFs while they are assigned\n");
5868                 return -EBUSY;
5869         }
5870
5871         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5872          * are equally distributed across the max-number of VFs. The user may
5873          * request only a subset of the max-vfs to be enabled.
5874          * Based on num_vfs, redistribute the resources across num_vfs so that
5875          * each VF will have access to more number of resources.
5876          * This facility is not available in BE3 FW.
5877          * Also, this is done by FW in Lancer chip.
5878          */
5879         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5880                 be_calculate_vf_res(adapter, adapter->num_vfs,
5881                                     &vft_res);
5882                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5883                                                  adapter->num_vfs, &vft_res);
5884                 if (status)
5885                         dev_err(&pdev->dev,
5886                                 "Failed to optimize SR-IOV resources\n");
5887         }
5888
5889         status = be_get_resources(adapter);
5890         if (status)
5891                 return be_cmd_status(status);
5892
5893         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5894         rtnl_lock();
5895         status = be_update_queues(adapter);
5896         rtnl_unlock();
5897         if (status)
5898                 return be_cmd_status(status);
5899
5900         if (adapter->num_vfs)
5901                 status = be_vf_setup(adapter);
5902
5903         if (!status)
5904                 return adapter->num_vfs;
5905
5906         return 0;
5907 }
5908
5909 static const struct pci_error_handlers be_eeh_handlers = {
5910         .error_detected = be_eeh_err_detected,
5911         .slot_reset = be_eeh_reset,
5912         .resume = be_eeh_resume,
5913 };
5914
5915 static struct pci_driver be_driver = {
5916         .name = DRV_NAME,
5917         .id_table = be_dev_ids,
5918         .probe = be_probe,
5919         .remove = be_remove,
5920         .suspend = be_suspend,
5921         .resume = be_pci_resume,
5922         .shutdown = be_shutdown,
5923         .sriov_configure = be_pci_sriov_configure,
5924         .err_handler = &be_eeh_handlers
5925 };
5926
5927 static int __init be_init_module(void)
5928 {
5929         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5930             rx_frag_size != 2048) {
5931                 printk(KERN_WARNING DRV_NAME
5932                         " : Module param rx_frag_size must be 2048/4096/8192."
5933                         " Using 2048\n");
5934                 rx_frag_size = 2048;
5935         }
5936
5937         if (num_vfs > 0) {
5938                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5939                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5940         }
5941
5942         be_wq = create_singlethread_workqueue("be_wq");
5943         if (!be_wq) {
5944                 pr_warn(DRV_NAME "workqueue creation failed\n");
5945                 return -1;
5946         }
5947
5948         return pci_register_driver(&be_driver);
5949 }
5950 module_init(be_init_module);
5951
5952 static void __exit be_exit_module(void)
5953 {
5954         pci_unregister_driver(&be_driver);
5955
5956         if (be_wq)
5957                 destroy_workqueue(be_wq);
5958 }
5959 module_exit(be_exit_module);