be2net: Avoid unnecessary firmware updates of multicast list
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58         "CEV",
59         "CTX",
60         "DBUF",
61         "ERX",
62         "Host",
63         "MPU",
64         "NDMA",
65         "PTC ",
66         "RDMA ",
67         "RXF ",
68         "RXIPS ",
69         "RXULP0 ",
70         "RXULP1 ",
71         "RXULP2 ",
72         "TIM ",
73         "TPOST ",
74         "TPRE ",
75         "TXIPS ",
76         "TXULP0 ",
77         "TXULP1 ",
78         "UC ",
79         "WDMA ",
80         "TXULP2 ",
81         "HOST1 ",
82         "P0_OB_LINK ",
83         "P1_OB_LINK ",
84         "HOST_GPIO ",
85         "MBOX ",
86         "ERX2 ",
87         "SPARE ",
88         "JTAG ",
89         "MPU_INTPEND "
90 };
91
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94         "LPCMEMHOST",
95         "MGMT_MAC",
96         "PCS0ONLINE",
97         "MPU_IRAM",
98         "PCS1ONLINE",
99         "PCTL0",
100         "PCTL1",
101         "PMEM",
102         "RR",
103         "TXPB",
104         "RXPP",
105         "XAUI",
106         "TXP",
107         "ARM",
108         "IPC",
109         "HOST2",
110         "HOST3",
111         "HOST4",
112         "HOST5",
113         "HOST6",
114         "HOST7",
115         "ECRC",
116         "Poison TLP",
117         "NETC",
118         "PERIPH",
119         "LLTXULP",
120         "D2P",
121         "RCON",
122         "LDMA",
123         "LLTXP",
124         "LLTXPB",
125         "Unknown"
126 };
127
128 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
129                                  BE_IF_FLAGS_BROADCAST | \
130                                  BE_IF_FLAGS_MULTICAST | \
131                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
132
133 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
134 {
135         struct be_dma_mem *mem = &q->dma_mem;
136
137         if (mem->va) {
138                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
139                                   mem->dma);
140                 mem->va = NULL;
141         }
142 }
143
144 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
145                           u16 len, u16 entry_size)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         memset(q, 0, sizeof(*q));
150         q->len = len;
151         q->entry_size = entry_size;
152         mem->size = len * entry_size;
153         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
154                                       GFP_KERNEL);
155         if (!mem->va)
156                 return -ENOMEM;
157         return 0;
158 }
159
160 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
161 {
162         u32 reg, enabled;
163
164         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
165                               &reg);
166         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167
168         if (!enabled && enable)
169                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
170         else if (enabled && !enable)
171                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
172         else
173                 return;
174
175         pci_write_config_dword(adapter->pdev,
176                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
177 }
178
179 static void be_intr_set(struct be_adapter *adapter, bool enable)
180 {
181         int status = 0;
182
183         /* On lancer interrupts can't be controlled via this register */
184         if (lancer_chip(adapter))
185                 return;
186
187         if (be_check_error(adapter, BE_ERROR_EEH))
188                 return;
189
190         status = be_cmd_intr_set(adapter, enable);
191         if (status)
192                 be_reg_intr_set(adapter, enable);
193 }
194
195 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
196 {
197         u32 val = 0;
198
199         if (be_check_error(adapter, BE_ERROR_HW))
200                 return;
201
202         val |= qid & DB_RQ_RING_ID_MASK;
203         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
204
205         wmb();
206         iowrite32(val, adapter->db + DB_RQ_OFFSET);
207 }
208
209 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
210                           u16 posted)
211 {
212         u32 val = 0;
213
214         if (be_check_error(adapter, BE_ERROR_HW))
215                 return;
216
217         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
218         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
219
220         wmb();
221         iowrite32(val, adapter->db + txo->db_offset);
222 }
223
224 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
225                          bool arm, bool clear_int, u16 num_popped,
226                          u32 eq_delay_mult_enc)
227 {
228         u32 val = 0;
229
230         val |= qid & DB_EQ_RING_ID_MASK;
231         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
232
233         if (be_check_error(adapter, BE_ERROR_HW))
234                 return;
235
236         if (arm)
237                 val |= 1 << DB_EQ_REARM_SHIFT;
238         if (clear_int)
239                 val |= 1 << DB_EQ_CLR_SHIFT;
240         val |= 1 << DB_EQ_EVNT_SHIFT;
241         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
242         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
243         iowrite32(val, adapter->db + DB_EQ_OFFSET);
244 }
245
246 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
247 {
248         u32 val = 0;
249
250         val |= qid & DB_CQ_RING_ID_MASK;
251         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
252                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
253
254         if (be_check_error(adapter, BE_ERROR_HW))
255                 return;
256
257         if (arm)
258                 val |= 1 << DB_CQ_REARM_SHIFT;
259         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
260         iowrite32(val, adapter->db + DB_CQ_OFFSET);
261 }
262
263 static int be_mac_addr_set(struct net_device *netdev, void *p)
264 {
265         struct be_adapter *adapter = netdev_priv(netdev);
266         struct device *dev = &adapter->pdev->dev;
267         struct sockaddr *addr = p;
268         int status;
269         u8 mac[ETH_ALEN];
270         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
271
272         if (!is_valid_ether_addr(addr->sa_data))
273                 return -EADDRNOTAVAIL;
274
275         /* Proceed further only if, User provided MAC is different
276          * from active MAC
277          */
278         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
279                 return 0;
280
281         /* if device is not running, copy MAC to netdev->dev_addr */
282         if (!netif_running(netdev))
283                 goto done;
284
285         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
286          * privilege or if PF did not provision the new MAC address.
287          * On BE3, this cmd will always fail if the VF doesn't have the
288          * FILTMGMT privilege. This failure is OK, only if the PF programmed
289          * the MAC for the VF.
290          */
291         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
292                                  adapter->if_handle, &adapter->pmac_id[0], 0);
293         if (!status) {
294                 curr_pmac_id = adapter->pmac_id[0];
295
296                 /* Delete the old programmed MAC. This call may fail if the
297                  * old MAC was already deleted by the PF driver.
298                  */
299                 if (adapter->pmac_id[0] != old_pmac_id)
300                         be_cmd_pmac_del(adapter, adapter->if_handle,
301                                         old_pmac_id, 0);
302         }
303
304         /* Decide if the new MAC is successfully activated only after
305          * querying the FW
306          */
307         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
308                                        adapter->if_handle, true, 0);
309         if (status)
310                 goto err;
311
312         /* The MAC change did not happen, either due to lack of privilege
313          * or PF didn't pre-provision.
314          */
315         if (!ether_addr_equal(addr->sa_data, mac)) {
316                 status = -EPERM;
317                 goto err;
318         }
319 done:
320         ether_addr_copy(netdev->dev_addr, addr->sa_data);
321         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
322         return 0;
323 err:
324         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
325         return status;
326 }
327
328 /* BE2 supports only v0 cmd */
329 static void *hw_stats_from_cmd(struct be_adapter *adapter)
330 {
331         if (BE2_chip(adapter)) {
332                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
333
334                 return &cmd->hw_stats;
335         } else if (BE3_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else {
340                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         }
344 }
345
346 /* BE2 supports only v0 cmd */
347 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
348 {
349         if (BE2_chip(adapter)) {
350                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
351
352                 return &hw_stats->erx;
353         } else if (BE3_chip(adapter)) {
354                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else {
358                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         }
362 }
363
364 static void populate_be_v0_stats(struct be_adapter *adapter)
365 {
366         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
367         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
368         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
369         struct be_port_rxf_stats_v0 *port_stats =
370                                         &rxf_stats->port[adapter->port_num];
371         struct be_drv_stats *drvs = &adapter->drv_stats;
372
373         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
374         drvs->rx_pause_frames = port_stats->rx_pause_frames;
375         drvs->rx_crc_errors = port_stats->rx_crc_errors;
376         drvs->rx_control_frames = port_stats->rx_control_frames;
377         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
378         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
379         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
380         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
381         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
382         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
383         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
384         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
385         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
386         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
387         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
388         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
389         drvs->rx_dropped_header_too_small =
390                 port_stats->rx_dropped_header_too_small;
391         drvs->rx_address_filtered =
392                                         port_stats->rx_address_filtered +
393                                         port_stats->rx_vlan_filtered;
394         drvs->rx_alignment_symbol_errors =
395                 port_stats->rx_alignment_symbol_errors;
396
397         drvs->tx_pauseframes = port_stats->tx_pauseframes;
398         drvs->tx_controlframes = port_stats->tx_controlframes;
399
400         if (adapter->port_num)
401                 drvs->jabber_events = rxf_stats->port1_jabber_events;
402         else
403                 drvs->jabber_events = rxf_stats->port0_jabber_events;
404         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
405         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
406         drvs->forwarded_packets = rxf_stats->forwarded_packets;
407         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
408         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
409         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
410         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
411 }
412
413 static void populate_be_v1_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v1 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
424         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
435         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
436         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
437         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_input_fifo_overflow_drop =
441                 port_stats->rx_input_fifo_overflow_drop;
442         drvs->rx_address_filtered = port_stats->rx_address_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
449         drvs->jabber_events = port_stats->jabber_events;
450         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
451         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
452         drvs->forwarded_packets = rxf_stats->forwarded_packets;
453         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
454         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
455         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
456         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
457 }
458
459 static void populate_be_v2_stats(struct be_adapter *adapter)
460 {
461         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
462         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
463         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
464         struct be_port_rxf_stats_v2 *port_stats =
465                                         &rxf_stats->port[adapter->port_num];
466         struct be_drv_stats *drvs = &adapter->drv_stats;
467
468         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
469         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
470         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
471         drvs->rx_pause_frames = port_stats->rx_pause_frames;
472         drvs->rx_crc_errors = port_stats->rx_crc_errors;
473         drvs->rx_control_frames = port_stats->rx_control_frames;
474         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
475         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
476         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
477         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
478         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
479         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
480         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
481         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
482         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
483         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
484         drvs->rx_dropped_header_too_small =
485                 port_stats->rx_dropped_header_too_small;
486         drvs->rx_input_fifo_overflow_drop =
487                 port_stats->rx_input_fifo_overflow_drop;
488         drvs->rx_address_filtered = port_stats->rx_address_filtered;
489         drvs->rx_alignment_symbol_errors =
490                 port_stats->rx_alignment_symbol_errors;
491         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
492         drvs->tx_pauseframes = port_stats->tx_pauseframes;
493         drvs->tx_controlframes = port_stats->tx_controlframes;
494         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
495         drvs->jabber_events = port_stats->jabber_events;
496         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
497         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
498         drvs->forwarded_packets = rxf_stats->forwarded_packets;
499         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
500         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
501         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
502         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
503         if (be_roce_supported(adapter)) {
504                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
505                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
506                 drvs->rx_roce_frames = port_stats->roce_frames_received;
507                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
508                 drvs->roce_drops_payload_len =
509                         port_stats->roce_drops_payload_len;
510         }
511 }
512
513 static void populate_lancer_stats(struct be_adapter *adapter)
514 {
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
517
518         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
519         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
520         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
521         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
522         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
523         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
524         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
525         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
526         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
527         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
528         drvs->rx_dropped_tcp_length =
529                                 pport_stats->rx_dropped_invalid_tcp_length;
530         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
533         drvs->rx_dropped_header_too_small =
534                                 pport_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536         drvs->rx_address_filtered =
537                                         pport_stats->rx_address_filtered +
538                                         pport_stats->rx_vlan_filtered;
539         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
540         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
541         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
542         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
543         drvs->jabber_events = pport_stats->rx_jabbers;
544         drvs->forwarded_packets = pport_stats->num_forwards_lo;
545         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
546         drvs->rx_drops_too_many_frags =
547                                 pport_stats->rx_drops_too_many_frags_lo;
548 }
549
550 static void accumulate_16bit_val(u32 *acc, u16 val)
551 {
552 #define lo(x)                   (x & 0xFFFF)
553 #define hi(x)                   (x & 0xFFFF0000)
554         bool wrapped = val < lo(*acc);
555         u32 newacc = hi(*acc) + val;
556
557         if (wrapped)
558                 newacc += 65536;
559         ACCESS_ONCE(*acc) = newacc;
560 }
561
562 static void populate_erx_stats(struct be_adapter *adapter,
563                                struct be_rx_obj *rxo, u32 erx_stat)
564 {
565         if (!BEx_chip(adapter))
566                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
567         else
568                 /* below erx HW counter can actually wrap around after
569                  * 65535. Driver accumulates a 32-bit value
570                  */
571                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
572                                      (u16)erx_stat);
573 }
574
575 void be_parse_stats(struct be_adapter *adapter)
576 {
577         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
578         struct be_rx_obj *rxo;
579         int i;
580         u32 erx_stat;
581
582         if (lancer_chip(adapter)) {
583                 populate_lancer_stats(adapter);
584         } else {
585                 if (BE2_chip(adapter))
586                         populate_be_v0_stats(adapter);
587                 else if (BE3_chip(adapter))
588                         /* for BE3 */
589                         populate_be_v1_stats(adapter);
590                 else
591                         populate_be_v2_stats(adapter);
592
593                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
594                 for_all_rx_queues(adapter, rxo, i) {
595                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
596                         populate_erx_stats(adapter, rxo, erx_stat);
597                 }
598         }
599 }
600
601 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
602                                                 struct rtnl_link_stats64 *stats)
603 {
604         struct be_adapter *adapter = netdev_priv(netdev);
605         struct be_drv_stats *drvs = &adapter->drv_stats;
606         struct be_rx_obj *rxo;
607         struct be_tx_obj *txo;
608         u64 pkts, bytes;
609         unsigned int start;
610         int i;
611
612         for_all_rx_queues(adapter, rxo, i) {
613                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
614
615                 do {
616                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
617                         pkts = rx_stats(rxo)->rx_pkts;
618                         bytes = rx_stats(rxo)->rx_bytes;
619                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
620                 stats->rx_packets += pkts;
621                 stats->rx_bytes += bytes;
622                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
623                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
624                                         rx_stats(rxo)->rx_drops_no_frags;
625         }
626
627         for_all_tx_queues(adapter, txo, i) {
628                 const struct be_tx_stats *tx_stats = tx_stats(txo);
629
630                 do {
631                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
632                         pkts = tx_stats(txo)->tx_pkts;
633                         bytes = tx_stats(txo)->tx_bytes;
634                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
635                 stats->tx_packets += pkts;
636                 stats->tx_bytes += bytes;
637         }
638
639         /* bad pkts received */
640         stats->rx_errors = drvs->rx_crc_errors +
641                 drvs->rx_alignment_symbol_errors +
642                 drvs->rx_in_range_errors +
643                 drvs->rx_out_range_errors +
644                 drvs->rx_frame_too_long +
645                 drvs->rx_dropped_too_small +
646                 drvs->rx_dropped_too_short +
647                 drvs->rx_dropped_header_too_small +
648                 drvs->rx_dropped_tcp_length +
649                 drvs->rx_dropped_runt;
650
651         /* detailed rx errors */
652         stats->rx_length_errors = drvs->rx_in_range_errors +
653                 drvs->rx_out_range_errors +
654                 drvs->rx_frame_too_long;
655
656         stats->rx_crc_errors = drvs->rx_crc_errors;
657
658         /* frame alignment errors */
659         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
660
661         /* receiver fifo overrun */
662         /* drops_no_pbuf is no per i/f, it's per BE card */
663         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
664                                 drvs->rx_input_fifo_overflow_drop +
665                                 drvs->rx_drops_no_pbuf;
666         return stats;
667 }
668
669 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
670 {
671         struct net_device *netdev = adapter->netdev;
672
673         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
674                 netif_carrier_off(netdev);
675                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
676         }
677
678         if (link_status)
679                 netif_carrier_on(netdev);
680         else
681                 netif_carrier_off(netdev);
682
683         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
684 }
685
686 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
687 {
688         struct be_tx_stats *stats = tx_stats(txo);
689         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
690
691         u64_stats_update_begin(&stats->sync);
692         stats->tx_reqs++;
693         stats->tx_bytes += skb->len;
694         stats->tx_pkts += tx_pkts;
695         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
696                 stats->tx_vxlan_offload_pkts += tx_pkts;
697         u64_stats_update_end(&stats->sync);
698 }
699
700 /* Returns number of WRBs needed for the skb */
701 static u32 skb_wrb_cnt(struct sk_buff *skb)
702 {
703         /* +1 for the header wrb */
704         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
705 }
706
707 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
708 {
709         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
710         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
711         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
712         wrb->rsvd0 = 0;
713 }
714
715 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
716  * to avoid the swap and shift/mask operations in wrb_fill().
717  */
718 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
719 {
720         wrb->frag_pa_hi = 0;
721         wrb->frag_pa_lo = 0;
722         wrb->frag_len = 0;
723         wrb->rsvd0 = 0;
724 }
725
726 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
727                                      struct sk_buff *skb)
728 {
729         u8 vlan_prio;
730         u16 vlan_tag;
731
732         vlan_tag = skb_vlan_tag_get(skb);
733         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
734         /* If vlan priority provided by OS is NOT in available bmap */
735         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
736                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
737                                 adapter->recommended_prio_bits;
738
739         return vlan_tag;
740 }
741
742 /* Used only for IP tunnel packets */
743 static u16 skb_inner_ip_proto(struct sk_buff *skb)
744 {
745         return (inner_ip_hdr(skb)->version == 4) ?
746                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
747 }
748
749 static u16 skb_ip_proto(struct sk_buff *skb)
750 {
751         return (ip_hdr(skb)->version == 4) ?
752                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
753 }
754
755 static inline bool be_is_txq_full(struct be_tx_obj *txo)
756 {
757         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
758 }
759
760 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
761 {
762         return atomic_read(&txo->q.used) < txo->q.len / 2;
763 }
764
765 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
766 {
767         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
768 }
769
770 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
771                                        struct sk_buff *skb,
772                                        struct be_wrb_params *wrb_params)
773 {
774         u16 proto;
775
776         if (skb_is_gso(skb)) {
777                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
778                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
779                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
780                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
781         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
782                 if (skb->encapsulation) {
783                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
784                         proto = skb_inner_ip_proto(skb);
785                 } else {
786                         proto = skb_ip_proto(skb);
787                 }
788                 if (proto == IPPROTO_TCP)
789                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
790                 else if (proto == IPPROTO_UDP)
791                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
792         }
793
794         if (skb_vlan_tag_present(skb)) {
795                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
796                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
797         }
798
799         BE_WRB_F_SET(wrb_params->features, CRC, 1);
800 }
801
802 static void wrb_fill_hdr(struct be_adapter *adapter,
803                          struct be_eth_hdr_wrb *hdr,
804                          struct be_wrb_params *wrb_params,
805                          struct sk_buff *skb)
806 {
807         memset(hdr, 0, sizeof(*hdr));
808
809         SET_TX_WRB_HDR_BITS(crc, hdr,
810                             BE_WRB_F_GET(wrb_params->features, CRC));
811         SET_TX_WRB_HDR_BITS(ipcs, hdr,
812                             BE_WRB_F_GET(wrb_params->features, IPCS));
813         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
814                             BE_WRB_F_GET(wrb_params->features, TCPCS));
815         SET_TX_WRB_HDR_BITS(udpcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, UDPCS));
817
818         SET_TX_WRB_HDR_BITS(lso, hdr,
819                             BE_WRB_F_GET(wrb_params->features, LSO));
820         SET_TX_WRB_HDR_BITS(lso6, hdr,
821                             BE_WRB_F_GET(wrb_params->features, LSO6));
822         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
823
824         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
825          * hack is not needed, the evt bit is set while ringing DB.
826          */
827         SET_TX_WRB_HDR_BITS(event, hdr,
828                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
829         SET_TX_WRB_HDR_BITS(vlan, hdr,
830                             BE_WRB_F_GET(wrb_params->features, VLAN));
831         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
832
833         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
834         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
835         SET_TX_WRB_HDR_BITS(mgmt, hdr,
836                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
837 }
838
839 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
840                           bool unmap_single)
841 {
842         dma_addr_t dma;
843         u32 frag_len = le32_to_cpu(wrb->frag_len);
844
845
846         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
847                 (u64)le32_to_cpu(wrb->frag_pa_lo);
848         if (frag_len) {
849                 if (unmap_single)
850                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
851                 else
852                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
853         }
854 }
855
856 /* Grab a WRB header for xmit */
857 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
858 {
859         u32 head = txo->q.head;
860
861         queue_head_inc(&txo->q);
862         return head;
863 }
864
865 /* Set up the WRB header for xmit */
866 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
867                                 struct be_tx_obj *txo,
868                                 struct be_wrb_params *wrb_params,
869                                 struct sk_buff *skb, u16 head)
870 {
871         u32 num_frags = skb_wrb_cnt(skb);
872         struct be_queue_info *txq = &txo->q;
873         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
874
875         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
876         be_dws_cpu_to_le(hdr, sizeof(*hdr));
877
878         BUG_ON(txo->sent_skb_list[head]);
879         txo->sent_skb_list[head] = skb;
880         txo->last_req_hdr = head;
881         atomic_add(num_frags, &txq->used);
882         txo->last_req_wrb_cnt = num_frags;
883         txo->pend_wrb_cnt += num_frags;
884 }
885
886 /* Setup a WRB fragment (buffer descriptor) for xmit */
887 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
888                                  int len)
889 {
890         struct be_eth_wrb *wrb;
891         struct be_queue_info *txq = &txo->q;
892
893         wrb = queue_head_node(txq);
894         wrb_fill(wrb, busaddr, len);
895         queue_head_inc(txq);
896 }
897
898 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
899  * was invoked. The producer index is restored to the previous packet and the
900  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
901  */
902 static void be_xmit_restore(struct be_adapter *adapter,
903                             struct be_tx_obj *txo, u32 head, bool map_single,
904                             u32 copied)
905 {
906         struct device *dev;
907         struct be_eth_wrb *wrb;
908         struct be_queue_info *txq = &txo->q;
909
910         dev = &adapter->pdev->dev;
911         txq->head = head;
912
913         /* skip the first wrb (hdr); it's not mapped */
914         queue_head_inc(txq);
915         while (copied) {
916                 wrb = queue_head_node(txq);
917                 unmap_tx_frag(dev, wrb, map_single);
918                 map_single = false;
919                 copied -= le32_to_cpu(wrb->frag_len);
920                 queue_head_inc(txq);
921         }
922
923         txq->head = head;
924 }
925
926 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
927  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
928  * of WRBs used up by the packet.
929  */
930 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
931                            struct sk_buff *skb,
932                            struct be_wrb_params *wrb_params)
933 {
934         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
935         struct device *dev = &adapter->pdev->dev;
936         struct be_queue_info *txq = &txo->q;
937         bool map_single = false;
938         u32 head = txq->head;
939         dma_addr_t busaddr;
940         int len;
941
942         head = be_tx_get_wrb_hdr(txo);
943
944         if (skb->len > skb->data_len) {
945                 len = skb_headlen(skb);
946
947                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
948                 if (dma_mapping_error(dev, busaddr))
949                         goto dma_err;
950                 map_single = true;
951                 be_tx_setup_wrb_frag(txo, busaddr, len);
952                 copied += len;
953         }
954
955         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
957                 len = skb_frag_size(frag);
958
959                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
960                 if (dma_mapping_error(dev, busaddr))
961                         goto dma_err;
962                 be_tx_setup_wrb_frag(txo, busaddr, len);
963                 copied += len;
964         }
965
966         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
967
968         be_tx_stats_update(txo, skb);
969         return wrb_cnt;
970
971 dma_err:
972         adapter->drv_stats.dma_map_errors++;
973         be_xmit_restore(adapter, txo, head, map_single, copied);
974         return 0;
975 }
976
977 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
978 {
979         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
980 }
981
982 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
983                                              struct sk_buff *skb,
984                                              struct be_wrb_params
985                                              *wrb_params)
986 {
987         u16 vlan_tag = 0;
988
989         skb = skb_share_check(skb, GFP_ATOMIC);
990         if (unlikely(!skb))
991                 return skb;
992
993         if (skb_vlan_tag_present(skb))
994                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
995
996         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
997                 if (!vlan_tag)
998                         vlan_tag = adapter->pvid;
999                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1000                  * skip VLAN insertion
1001                  */
1002                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1003         }
1004
1005         if (vlan_tag) {
1006                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1007                                                 vlan_tag);
1008                 if (unlikely(!skb))
1009                         return skb;
1010                 skb->vlan_tci = 0;
1011         }
1012
1013         /* Insert the outer VLAN, if any */
1014         if (adapter->qnq_vid) {
1015                 vlan_tag = adapter->qnq_vid;
1016                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1017                                                 vlan_tag);
1018                 if (unlikely(!skb))
1019                         return skb;
1020                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1021         }
1022
1023         return skb;
1024 }
1025
1026 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1027 {
1028         struct ethhdr *eh = (struct ethhdr *)skb->data;
1029         u16 offset = ETH_HLEN;
1030
1031         if (eh->h_proto == htons(ETH_P_IPV6)) {
1032                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1033
1034                 offset += sizeof(struct ipv6hdr);
1035                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1036                     ip6h->nexthdr != NEXTHDR_UDP) {
1037                         struct ipv6_opt_hdr *ehdr =
1038                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1039
1040                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1041                         if (ehdr->hdrlen == 0xff)
1042                                 return true;
1043                 }
1044         }
1045         return false;
1046 }
1047
1048 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1051 }
1052
1053 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1054 {
1055         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1056 }
1057
1058 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1059                                                   struct sk_buff *skb,
1060                                                   struct be_wrb_params
1061                                                   *wrb_params)
1062 {
1063         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1064         unsigned int eth_hdr_len;
1065         struct iphdr *ip;
1066
1067         /* For padded packets, BE HW modifies tot_len field in IP header
1068          * incorrecly when VLAN tag is inserted by HW.
1069          * For padded packets, Lancer computes incorrect checksum.
1070          */
1071         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1072                                                 VLAN_ETH_HLEN : ETH_HLEN;
1073         if (skb->len <= 60 &&
1074             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1075             is_ipv4_pkt(skb)) {
1076                 ip = (struct iphdr *)ip_hdr(skb);
1077                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1078         }
1079
1080         /* If vlan tag is already inlined in the packet, skip HW VLAN
1081          * tagging in pvid-tagging mode
1082          */
1083         if (be_pvid_tagging_enabled(adapter) &&
1084             veh->h_vlan_proto == htons(ETH_P_8021Q))
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086
1087         /* HW has a bug wherein it will calculate CSUM for VLAN
1088          * pkts even though it is disabled.
1089          * Manually insert VLAN in pkt.
1090          */
1091         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1092             skb_vlan_tag_present(skb)) {
1093                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1094                 if (unlikely(!skb))
1095                         goto err;
1096         }
1097
1098         /* HW may lockup when VLAN HW tagging is requested on
1099          * certain ipv6 packets. Drop such pkts if the HW workaround to
1100          * skip HW tagging is not enabled by FW.
1101          */
1102         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1103                      (adapter->pvid || adapter->qnq_vid) &&
1104                      !qnq_async_evt_rcvd(adapter)))
1105                 goto tx_drop;
1106
1107         /* Manual VLAN tag insertion to prevent:
1108          * ASIC lockup when the ASIC inserts VLAN tag into
1109          * certain ipv6 packets. Insert VLAN tags in driver,
1110          * and set event, completion, vlan bits accordingly
1111          * in the Tx WRB.
1112          */
1113         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1114             be_vlan_tag_tx_chk(adapter, skb)) {
1115                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1116                 if (unlikely(!skb))
1117                         goto err;
1118         }
1119
1120         return skb;
1121 tx_drop:
1122         dev_kfree_skb_any(skb);
1123 err:
1124         return NULL;
1125 }
1126
1127 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1128                                            struct sk_buff *skb,
1129                                            struct be_wrb_params *wrb_params)
1130 {
1131         int err;
1132
1133         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1134          * packets that are 32b or less may cause a transmit stall
1135          * on that port. The workaround is to pad such packets
1136          * (len <= 32 bytes) to a minimum length of 36b.
1137          */
1138         if (skb->len <= 32) {
1139                 if (skb_put_padto(skb, 36))
1140                         return NULL;
1141         }
1142
1143         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1144                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1145                 if (!skb)
1146                         return NULL;
1147         }
1148
1149         /* The stack can send us skbs with length greater than
1150          * what the HW can handle. Trim the extra bytes.
1151          */
1152         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1153         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1154         WARN_ON(err);
1155
1156         return skb;
1157 }
1158
1159 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1160 {
1161         struct be_queue_info *txq = &txo->q;
1162         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1163
1164         /* Mark the last request eventable if it hasn't been marked already */
1165         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1166                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1167
1168         /* compose a dummy wrb if there are odd set of wrbs to notify */
1169         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1170                 wrb_fill_dummy(queue_head_node(txq));
1171                 queue_head_inc(txq);
1172                 atomic_inc(&txq->used);
1173                 txo->pend_wrb_cnt++;
1174                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1175                                            TX_HDR_WRB_NUM_SHIFT);
1176                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1177                                           TX_HDR_WRB_NUM_SHIFT);
1178         }
1179         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1180         txo->pend_wrb_cnt = 0;
1181 }
1182
1183 /* OS2BMC related */
1184
1185 #define DHCP_CLIENT_PORT        68
1186 #define DHCP_SERVER_PORT        67
1187 #define NET_BIOS_PORT1          137
1188 #define NET_BIOS_PORT2          138
1189 #define DHCPV6_RAS_PORT         547
1190
1191 #define is_mc_allowed_on_bmc(adapter, eh)       \
1192         (!is_multicast_filt_enabled(adapter) && \
1193          is_multicast_ether_addr(eh->h_dest) && \
1194          !is_broadcast_ether_addr(eh->h_dest))
1195
1196 #define is_bc_allowed_on_bmc(adapter, eh)       \
1197         (!is_broadcast_filt_enabled(adapter) && \
1198          is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_arp_allowed_on_bmc(adapter, skb)     \
1201         (is_arp(skb) && is_arp_filt_enabled(adapter))
1202
1203 #define is_broadcast_packet(eh, adapter)        \
1204                 (is_multicast_ether_addr(eh->h_dest) && \
1205                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1206
1207 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1208
1209 #define is_arp_filt_enabled(adapter)    \
1210                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1211
1212 #define is_dhcp_client_filt_enabled(adapter)    \
1213                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1214
1215 #define is_dhcp_srvr_filt_enabled(adapter)      \
1216                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1217
1218 #define is_nbios_filt_enabled(adapter)  \
1219                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1220
1221 #define is_ipv6_na_filt_enabled(adapter)        \
1222                 (adapter->bmc_filt_mask &       \
1223                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1224
1225 #define is_ipv6_ra_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1227
1228 #define is_ipv6_ras_filt_enabled(adapter)       \
1229                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1230
1231 #define is_broadcast_filt_enabled(adapter)      \
1232                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1233
1234 #define is_multicast_filt_enabled(adapter)      \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1236
1237 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1238                                struct sk_buff **skb)
1239 {
1240         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1241         bool os2bmc = false;
1242
1243         if (!be_is_os2bmc_enabled(adapter))
1244                 goto done;
1245
1246         if (!is_multicast_ether_addr(eh->h_dest))
1247                 goto done;
1248
1249         if (is_mc_allowed_on_bmc(adapter, eh) ||
1250             is_bc_allowed_on_bmc(adapter, eh) ||
1251             is_arp_allowed_on_bmc(adapter, (*skb))) {
1252                 os2bmc = true;
1253                 goto done;
1254         }
1255
1256         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1257                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1258                 u8 nexthdr = hdr->nexthdr;
1259
1260                 if (nexthdr == IPPROTO_ICMPV6) {
1261                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1262
1263                         switch (icmp6->icmp6_type) {
1264                         case NDISC_ROUTER_ADVERTISEMENT:
1265                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1266                                 goto done;
1267                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1268                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1269                                 goto done;
1270                         default:
1271                                 break;
1272                         }
1273                 }
1274         }
1275
1276         if (is_udp_pkt((*skb))) {
1277                 struct udphdr *udp = udp_hdr((*skb));
1278
1279                 switch (ntohs(udp->dest)) {
1280                 case DHCP_CLIENT_PORT:
1281                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1282                         goto done;
1283                 case DHCP_SERVER_PORT:
1284                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1285                         goto done;
1286                 case NET_BIOS_PORT1:
1287                 case NET_BIOS_PORT2:
1288                         os2bmc = is_nbios_filt_enabled(adapter);
1289                         goto done;
1290                 case DHCPV6_RAS_PORT:
1291                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1292                         goto done;
1293                 default:
1294                         break;
1295                 }
1296         }
1297 done:
1298         /* For packets over a vlan, which are destined
1299          * to BMC, asic expects the vlan to be inline in the packet.
1300          */
1301         if (os2bmc)
1302                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1303
1304         return os2bmc;
1305 }
1306
1307 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1308 {
1309         struct be_adapter *adapter = netdev_priv(netdev);
1310         u16 q_idx = skb_get_queue_mapping(skb);
1311         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1312         struct be_wrb_params wrb_params = { 0 };
1313         bool flush = !skb->xmit_more;
1314         u16 wrb_cnt;
1315
1316         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1317         if (unlikely(!skb))
1318                 goto drop;
1319
1320         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1321
1322         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1323         if (unlikely(!wrb_cnt)) {
1324                 dev_kfree_skb_any(skb);
1325                 goto drop;
1326         }
1327
1328         /* if os2bmc is enabled and if the pkt is destined to bmc,
1329          * enqueue the pkt a 2nd time with mgmt bit set.
1330          */
1331         if (be_send_pkt_to_bmc(adapter, &skb)) {
1332                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1333                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1334                 if (unlikely(!wrb_cnt))
1335                         goto drop;
1336                 else
1337                         skb_get(skb);
1338         }
1339
1340         if (be_is_txq_full(txo)) {
1341                 netif_stop_subqueue(netdev, q_idx);
1342                 tx_stats(txo)->tx_stops++;
1343         }
1344
1345         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1346                 be_xmit_flush(adapter, txo);
1347
1348         return NETDEV_TX_OK;
1349 drop:
1350         tx_stats(txo)->tx_drv_drops++;
1351         /* Flush the already enqueued tx requests */
1352         if (flush && txo->pend_wrb_cnt)
1353                 be_xmit_flush(adapter, txo);
1354
1355         return NETDEV_TX_OK;
1356 }
1357
1358 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         struct device *dev = &adapter->pdev->dev;
1362
1363         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1364                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1365                          BE_MIN_MTU, BE_MAX_MTU);
1366                 return -EINVAL;
1367         }
1368
1369         dev_info(dev, "MTU changed from %d to %d bytes\n",
1370                  netdev->mtu, new_mtu);
1371         netdev->mtu = new_mtu;
1372         return 0;
1373 }
1374
1375 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1376 {
1377         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1378                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1379 }
1380
1381 static int be_set_vlan_promisc(struct be_adapter *adapter)
1382 {
1383         struct device *dev = &adapter->pdev->dev;
1384         int status;
1385
1386         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1387                 return 0;
1388
1389         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1390         if (!status) {
1391                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1392                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1393         } else {
1394                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1395         }
1396         return status;
1397 }
1398
1399 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1400 {
1401         struct device *dev = &adapter->pdev->dev;
1402         int status;
1403
1404         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1405         if (!status) {
1406                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1407                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1408         }
1409         return status;
1410 }
1411
1412 /*
1413  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1414  * If the user configures more, place BE in vlan promiscuous mode.
1415  */
1416 static int be_vid_config(struct be_adapter *adapter)
1417 {
1418         struct device *dev = &adapter->pdev->dev;
1419         u16 vids[BE_NUM_VLANS_SUPPORTED];
1420         u16 num = 0, i = 0;
1421         int status = 0;
1422
1423         /* No need to change the VLAN state if the I/F is in promiscuous */
1424         if (adapter->netdev->flags & IFF_PROMISC)
1425                 return 0;
1426
1427         if (adapter->vlans_added > be_max_vlans(adapter))
1428                 return be_set_vlan_promisc(adapter);
1429
1430         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1431                 status = be_clear_vlan_promisc(adapter);
1432                 if (status)
1433                         return status;
1434         }
1435         /* Construct VLAN Table to give to HW */
1436         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1437                 vids[num++] = cpu_to_le16(i);
1438
1439         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1440         if (status) {
1441                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1442                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1443                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1444                     addl_status(status) ==
1445                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1446                         return be_set_vlan_promisc(adapter);
1447         }
1448         return status;
1449 }
1450
1451 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1452 {
1453         struct be_adapter *adapter = netdev_priv(netdev);
1454         int status = 0;
1455
1456         /* Packets with VID 0 are always received by Lancer by default */
1457         if (lancer_chip(adapter) && vid == 0)
1458                 return status;
1459
1460         if (test_bit(vid, adapter->vids))
1461                 return status;
1462
1463         set_bit(vid, adapter->vids);
1464         adapter->vlans_added++;
1465
1466         return be_vid_config(adapter);
1467 }
1468
1469 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1470 {
1471         struct be_adapter *adapter = netdev_priv(netdev);
1472
1473         /* Packets with VID 0 are always received by Lancer by default */
1474         if (lancer_chip(adapter) && vid == 0)
1475                 return 0;
1476
1477         if (!test_bit(vid, adapter->vids))
1478                 return 0;
1479
1480         clear_bit(vid, adapter->vids);
1481         adapter->vlans_added--;
1482
1483         return be_vid_config(adapter);
1484 }
1485
1486 static void be_set_all_promisc(struct be_adapter *adapter)
1487 {
1488         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1489         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1490 }
1491
1492 static void be_set_mc_promisc(struct be_adapter *adapter)
1493 {
1494         int status;
1495
1496         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1497                 return;
1498
1499         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1500         if (!status)
1501                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1502 }
1503
1504 static void be_set_uc_promisc(struct be_adapter *adapter)
1505 {
1506         int status;
1507
1508         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1509                 return;
1510
1511         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1512         if (!status)
1513                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1514 }
1515
1516 static void be_clear_uc_promisc(struct be_adapter *adapter)
1517 {
1518         int status;
1519
1520         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1521                 return;
1522
1523         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1524         if (!status)
1525                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1526 }
1527
1528 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1529  * We use a single callback function for both sync and unsync. We really don't
1530  * add/remove addresses through this callback. But, we use it to detect changes
1531  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1532  */
1533 static int be_uc_list_update(struct net_device *netdev,
1534                              const unsigned char *addr)
1535 {
1536         struct be_adapter *adapter = netdev_priv(netdev);
1537
1538         adapter->update_uc_list = true;
1539         return 0;
1540 }
1541
1542 static int be_mc_list_update(struct net_device *netdev,
1543                              const unsigned char *addr)
1544 {
1545         struct be_adapter *adapter = netdev_priv(netdev);
1546
1547         adapter->update_mc_list = true;
1548         return 0;
1549 }
1550
1551 static void be_set_mc_list(struct be_adapter *adapter)
1552 {
1553         struct net_device *netdev = adapter->netdev;
1554         bool mc_promisc = false;
1555         int status;
1556
1557         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1558
1559         if (netdev->flags & IFF_PROMISC) {
1560                 adapter->update_mc_list = false;
1561         } else if (netdev->flags & IFF_ALLMULTI ||
1562                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1563                 /* Enable multicast promisc if num configured exceeds
1564                  * what we support
1565                  */
1566                 mc_promisc = true;
1567                 adapter->update_mc_list = false;
1568         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1569                 /* Update mc-list unconditionally if the iface was previously
1570                  * in mc-promisc mode and now is out of that mode.
1571                  */
1572                 adapter->update_mc_list = true;
1573         }
1574
1575         if (mc_promisc) {
1576                 be_set_mc_promisc(adapter);
1577         } else if (adapter->update_mc_list) {
1578                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1579                 if (!status)
1580                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1581                 else
1582                         be_set_mc_promisc(adapter);
1583
1584                 adapter->update_mc_list = false;
1585         }
1586 }
1587
1588 static void be_clear_mc_list(struct be_adapter *adapter)
1589 {
1590         struct net_device *netdev = adapter->netdev;
1591
1592         __dev_mc_unsync(netdev, NULL);
1593         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1594 }
1595
1596 static void be_set_uc_list(struct be_adapter *adapter)
1597 {
1598         struct net_device *netdev = adapter->netdev;
1599         struct netdev_hw_addr *ha;
1600         bool uc_promisc = false;
1601         int i = 1; /* First slot is claimed by the Primary MAC */
1602
1603         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1604
1605         if (netdev->flags & IFF_PROMISC) {
1606                 adapter->update_uc_list = false;
1607         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1608                 uc_promisc = true;
1609                 adapter->update_uc_list = false;
1610         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1611                 /* Update uc-list unconditionally if the iface was previously
1612                  * in uc-promisc mode and now is out of that mode.
1613                  */
1614                 adapter->update_uc_list = true;
1615         }
1616
1617         if (uc_promisc) {
1618                 be_set_uc_promisc(adapter);
1619         } else if (adapter->update_uc_list) {
1620                 be_clear_uc_promisc(adapter);
1621
1622                 for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1623                         be_cmd_pmac_del(adapter, adapter->if_handle,
1624                                         adapter->pmac_id[i], 0);
1625
1626                 netdev_for_each_uc_addr(ha, adapter->netdev) {
1627                         adapter->uc_macs++; /* First slot is for Primary MAC */
1628                         be_cmd_pmac_add(adapter,
1629                                         (u8 *)ha->addr, adapter->if_handle,
1630                                         &adapter->pmac_id[adapter->uc_macs], 0);
1631                 }
1632                 adapter->update_uc_list = false;
1633         }
1634 }
1635
1636 static void be_clear_uc_list(struct be_adapter *adapter)
1637 {
1638         struct net_device *netdev = adapter->netdev;
1639         int i;
1640
1641         __dev_uc_unsync(netdev, NULL);
1642         for (i = 1; i < (adapter->uc_macs + 1); i++)
1643                 be_cmd_pmac_del(adapter, adapter->if_handle,
1644                                 adapter->pmac_id[i], 0);
1645         adapter->uc_macs = 0;
1646 }
1647
1648 static void be_set_rx_mode(struct net_device *netdev)
1649 {
1650         struct be_adapter *adapter = netdev_priv(netdev);
1651
1652         if (netdev->flags & IFF_PROMISC) {
1653                 if (!be_in_all_promisc(adapter))
1654                         be_set_all_promisc(adapter);
1655         } else if (be_in_all_promisc(adapter)) {
1656                 /* We need to re-program the vlan-list or clear
1657                  * vlan-promisc mode (if needed) when the interface
1658                  * comes out of promisc mode.
1659                  */
1660                 be_vid_config(adapter);
1661         }
1662
1663         be_set_uc_list(adapter);
1664         be_set_mc_list(adapter);
1665 }
1666
1667 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1668 {
1669         struct be_adapter *adapter = netdev_priv(netdev);
1670         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1671         int status;
1672
1673         if (!sriov_enabled(adapter))
1674                 return -EPERM;
1675
1676         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1677                 return -EINVAL;
1678
1679         /* Proceed further only if user provided MAC is different
1680          * from active MAC
1681          */
1682         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1683                 return 0;
1684
1685         if (BEx_chip(adapter)) {
1686                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1687                                 vf + 1);
1688
1689                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1690                                          &vf_cfg->pmac_id, vf + 1);
1691         } else {
1692                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1693                                         vf + 1);
1694         }
1695
1696         if (status) {
1697                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1698                         mac, vf, status);
1699                 return be_cmd_status(status);
1700         }
1701
1702         ether_addr_copy(vf_cfg->mac_addr, mac);
1703
1704         return 0;
1705 }
1706
1707 static int be_get_vf_config(struct net_device *netdev, int vf,
1708                             struct ifla_vf_info *vi)
1709 {
1710         struct be_adapter *adapter = netdev_priv(netdev);
1711         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1712
1713         if (!sriov_enabled(adapter))
1714                 return -EPERM;
1715
1716         if (vf >= adapter->num_vfs)
1717                 return -EINVAL;
1718
1719         vi->vf = vf;
1720         vi->max_tx_rate = vf_cfg->tx_rate;
1721         vi->min_tx_rate = 0;
1722         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1723         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1724         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1725         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1726         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1727
1728         return 0;
1729 }
1730
1731 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1732 {
1733         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1734         u16 vids[BE_NUM_VLANS_SUPPORTED];
1735         int vf_if_id = vf_cfg->if_handle;
1736         int status;
1737
1738         /* Enable Transparent VLAN Tagging */
1739         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1740         if (status)
1741                 return status;
1742
1743         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1744         vids[0] = 0;
1745         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1746         if (!status)
1747                 dev_info(&adapter->pdev->dev,
1748                          "Cleared guest VLANs on VF%d", vf);
1749
1750         /* After TVT is enabled, disallow VFs to program VLAN filters */
1751         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1752                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1753                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1754                 if (!status)
1755                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1756         }
1757         return 0;
1758 }
1759
1760 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1761 {
1762         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1763         struct device *dev = &adapter->pdev->dev;
1764         int status;
1765
1766         /* Reset Transparent VLAN Tagging. */
1767         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1768                                        vf_cfg->if_handle, 0, 0);
1769         if (status)
1770                 return status;
1771
1772         /* Allow VFs to program VLAN filtering */
1773         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1774                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1775                                                   BE_PRIV_FILTMGMT, vf + 1);
1776                 if (!status) {
1777                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1778                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1779                 }
1780         }
1781
1782         dev_info(dev,
1783                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1784         return 0;
1785 }
1786
1787 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1788 {
1789         struct be_adapter *adapter = netdev_priv(netdev);
1790         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1791         int status;
1792
1793         if (!sriov_enabled(adapter))
1794                 return -EPERM;
1795
1796         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1797                 return -EINVAL;
1798
1799         if (vlan || qos) {
1800                 vlan |= qos << VLAN_PRIO_SHIFT;
1801                 status = be_set_vf_tvt(adapter, vf, vlan);
1802         } else {
1803                 status = be_clear_vf_tvt(adapter, vf);
1804         }
1805
1806         if (status) {
1807                 dev_err(&adapter->pdev->dev,
1808                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1809                         status);
1810                 return be_cmd_status(status);
1811         }
1812
1813         vf_cfg->vlan_tag = vlan;
1814         return 0;
1815 }
1816
1817 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1818                              int min_tx_rate, int max_tx_rate)
1819 {
1820         struct be_adapter *adapter = netdev_priv(netdev);
1821         struct device *dev = &adapter->pdev->dev;
1822         int percent_rate, status = 0;
1823         u16 link_speed = 0;
1824         u8 link_status;
1825
1826         if (!sriov_enabled(adapter))
1827                 return -EPERM;
1828
1829         if (vf >= adapter->num_vfs)
1830                 return -EINVAL;
1831
1832         if (min_tx_rate)
1833                 return -EINVAL;
1834
1835         if (!max_tx_rate)
1836                 goto config_qos;
1837
1838         status = be_cmd_link_status_query(adapter, &link_speed,
1839                                           &link_status, 0);
1840         if (status)
1841                 goto err;
1842
1843         if (!link_status) {
1844                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1845                 status = -ENETDOWN;
1846                 goto err;
1847         }
1848
1849         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1850                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1851                         link_speed);
1852                 status = -EINVAL;
1853                 goto err;
1854         }
1855
1856         /* On Skyhawk the QOS setting must be done only as a % value */
1857         percent_rate = link_speed / 100;
1858         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1859                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1860                         percent_rate);
1861                 status = -EINVAL;
1862                 goto err;
1863         }
1864
1865 config_qos:
1866         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1867         if (status)
1868                 goto err;
1869
1870         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1871         return 0;
1872
1873 err:
1874         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1875                 max_tx_rate, vf);
1876         return be_cmd_status(status);
1877 }
1878
1879 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1880                                 int link_state)
1881 {
1882         struct be_adapter *adapter = netdev_priv(netdev);
1883         int status;
1884
1885         if (!sriov_enabled(adapter))
1886                 return -EPERM;
1887
1888         if (vf >= adapter->num_vfs)
1889                 return -EINVAL;
1890
1891         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1892         if (status) {
1893                 dev_err(&adapter->pdev->dev,
1894                         "Link state change on VF %d failed: %#x\n", vf, status);
1895                 return be_cmd_status(status);
1896         }
1897
1898         adapter->vf_cfg[vf].plink_tracking = link_state;
1899
1900         return 0;
1901 }
1902
1903 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1904 {
1905         struct be_adapter *adapter = netdev_priv(netdev);
1906         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1907         u8 spoofchk;
1908         int status;
1909
1910         if (!sriov_enabled(adapter))
1911                 return -EPERM;
1912
1913         if (vf >= adapter->num_vfs)
1914                 return -EINVAL;
1915
1916         if (BEx_chip(adapter))
1917                 return -EOPNOTSUPP;
1918
1919         if (enable == vf_cfg->spoofchk)
1920                 return 0;
1921
1922         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1923
1924         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1925                                        0, spoofchk);
1926         if (status) {
1927                 dev_err(&adapter->pdev->dev,
1928                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1929                 return be_cmd_status(status);
1930         }
1931
1932         vf_cfg->spoofchk = enable;
1933         return 0;
1934 }
1935
1936 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1937                           ulong now)
1938 {
1939         aic->rx_pkts_prev = rx_pkts;
1940         aic->tx_reqs_prev = tx_pkts;
1941         aic->jiffies = now;
1942 }
1943
1944 static int be_get_new_eqd(struct be_eq_obj *eqo)
1945 {
1946         struct be_adapter *adapter = eqo->adapter;
1947         int eqd, start;
1948         struct be_aic_obj *aic;
1949         struct be_rx_obj *rxo;
1950         struct be_tx_obj *txo;
1951         u64 rx_pkts = 0, tx_pkts = 0;
1952         ulong now;
1953         u32 pps, delta;
1954         int i;
1955
1956         aic = &adapter->aic_obj[eqo->idx];
1957         if (!aic->enable) {
1958                 if (aic->jiffies)
1959                         aic->jiffies = 0;
1960                 eqd = aic->et_eqd;
1961                 return eqd;
1962         }
1963
1964         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1965                 do {
1966                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1967                         rx_pkts += rxo->stats.rx_pkts;
1968                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1969         }
1970
1971         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1972                 do {
1973                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1974                         tx_pkts += txo->stats.tx_reqs;
1975                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1976         }
1977
1978         /* Skip, if wrapped around or first calculation */
1979         now = jiffies;
1980         if (!aic->jiffies || time_before(now, aic->jiffies) ||
1981             rx_pkts < aic->rx_pkts_prev ||
1982             tx_pkts < aic->tx_reqs_prev) {
1983                 be_aic_update(aic, rx_pkts, tx_pkts, now);
1984                 return aic->prev_eqd;
1985         }
1986
1987         delta = jiffies_to_msecs(now - aic->jiffies);
1988         if (delta == 0)
1989                 return aic->prev_eqd;
1990
1991         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1992                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1993         eqd = (pps / 15000) << 2;
1994
1995         if (eqd < 8)
1996                 eqd = 0;
1997         eqd = min_t(u32, eqd, aic->max_eqd);
1998         eqd = max_t(u32, eqd, aic->min_eqd);
1999
2000         be_aic_update(aic, rx_pkts, tx_pkts, now);
2001
2002         return eqd;
2003 }
2004
2005 /* For Skyhawk-R only */
2006 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2007 {
2008         struct be_adapter *adapter = eqo->adapter;
2009         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2010         ulong now = jiffies;
2011         int eqd;
2012         u32 mult_enc;
2013
2014         if (!aic->enable)
2015                 return 0;
2016
2017         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2018                 eqd = aic->prev_eqd;
2019         else
2020                 eqd = be_get_new_eqd(eqo);
2021
2022         if (eqd > 100)
2023                 mult_enc = R2I_DLY_ENC_1;
2024         else if (eqd > 60)
2025                 mult_enc = R2I_DLY_ENC_2;
2026         else if (eqd > 20)
2027                 mult_enc = R2I_DLY_ENC_3;
2028         else
2029                 mult_enc = R2I_DLY_ENC_0;
2030
2031         aic->prev_eqd = eqd;
2032
2033         return mult_enc;
2034 }
2035
2036 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2037 {
2038         struct be_set_eqd set_eqd[MAX_EVT_QS];
2039         struct be_aic_obj *aic;
2040         struct be_eq_obj *eqo;
2041         int i, num = 0, eqd;
2042
2043         for_all_evt_queues(adapter, eqo, i) {
2044                 aic = &adapter->aic_obj[eqo->idx];
2045                 eqd = be_get_new_eqd(eqo);
2046                 if (force_update || eqd != aic->prev_eqd) {
2047                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2048                         set_eqd[num].eq_id = eqo->q.id;
2049                         aic->prev_eqd = eqd;
2050                         num++;
2051                 }
2052         }
2053
2054         if (num)
2055                 be_cmd_modify_eqd(adapter, set_eqd, num);
2056 }
2057
2058 static void be_rx_stats_update(struct be_rx_obj *rxo,
2059                                struct be_rx_compl_info *rxcp)
2060 {
2061         struct be_rx_stats *stats = rx_stats(rxo);
2062
2063         u64_stats_update_begin(&stats->sync);
2064         stats->rx_compl++;
2065         stats->rx_bytes += rxcp->pkt_size;
2066         stats->rx_pkts++;
2067         if (rxcp->tunneled)
2068                 stats->rx_vxlan_offload_pkts++;
2069         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2070                 stats->rx_mcast_pkts++;
2071         if (rxcp->err)
2072                 stats->rx_compl_err++;
2073         u64_stats_update_end(&stats->sync);
2074 }
2075
2076 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2077 {
2078         /* L4 checksum is not reliable for non TCP/UDP packets.
2079          * Also ignore ipcksm for ipv6 pkts
2080          */
2081         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2082                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2083 }
2084
2085 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2086 {
2087         struct be_adapter *adapter = rxo->adapter;
2088         struct be_rx_page_info *rx_page_info;
2089         struct be_queue_info *rxq = &rxo->q;
2090         u32 frag_idx = rxq->tail;
2091
2092         rx_page_info = &rxo->page_info_tbl[frag_idx];
2093         BUG_ON(!rx_page_info->page);
2094
2095         if (rx_page_info->last_frag) {
2096                 dma_unmap_page(&adapter->pdev->dev,
2097                                dma_unmap_addr(rx_page_info, bus),
2098                                adapter->big_page_size, DMA_FROM_DEVICE);
2099                 rx_page_info->last_frag = false;
2100         } else {
2101                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2102                                         dma_unmap_addr(rx_page_info, bus),
2103                                         rx_frag_size, DMA_FROM_DEVICE);
2104         }
2105
2106         queue_tail_inc(rxq);
2107         atomic_dec(&rxq->used);
2108         return rx_page_info;
2109 }
2110
2111 /* Throwaway the data in the Rx completion */
2112 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2113                                 struct be_rx_compl_info *rxcp)
2114 {
2115         struct be_rx_page_info *page_info;
2116         u16 i, num_rcvd = rxcp->num_rcvd;
2117
2118         for (i = 0; i < num_rcvd; i++) {
2119                 page_info = get_rx_page_info(rxo);
2120                 put_page(page_info->page);
2121                 memset(page_info, 0, sizeof(*page_info));
2122         }
2123 }
2124
2125 /*
2126  * skb_fill_rx_data forms a complete skb for an ether frame
2127  * indicated by rxcp.
2128  */
2129 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2130                              struct be_rx_compl_info *rxcp)
2131 {
2132         struct be_rx_page_info *page_info;
2133         u16 i, j;
2134         u16 hdr_len, curr_frag_len, remaining;
2135         u8 *start;
2136
2137         page_info = get_rx_page_info(rxo);
2138         start = page_address(page_info->page) + page_info->page_offset;
2139         prefetch(start);
2140
2141         /* Copy data in the first descriptor of this completion */
2142         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2143
2144         skb->len = curr_frag_len;
2145         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2146                 memcpy(skb->data, start, curr_frag_len);
2147                 /* Complete packet has now been moved to data */
2148                 put_page(page_info->page);
2149                 skb->data_len = 0;
2150                 skb->tail += curr_frag_len;
2151         } else {
2152                 hdr_len = ETH_HLEN;
2153                 memcpy(skb->data, start, hdr_len);
2154                 skb_shinfo(skb)->nr_frags = 1;
2155                 skb_frag_set_page(skb, 0, page_info->page);
2156                 skb_shinfo(skb)->frags[0].page_offset =
2157                                         page_info->page_offset + hdr_len;
2158                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2159                                   curr_frag_len - hdr_len);
2160                 skb->data_len = curr_frag_len - hdr_len;
2161                 skb->truesize += rx_frag_size;
2162                 skb->tail += hdr_len;
2163         }
2164         page_info->page = NULL;
2165
2166         if (rxcp->pkt_size <= rx_frag_size) {
2167                 BUG_ON(rxcp->num_rcvd != 1);
2168                 return;
2169         }
2170
2171         /* More frags present for this completion */
2172         remaining = rxcp->pkt_size - curr_frag_len;
2173         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2174                 page_info = get_rx_page_info(rxo);
2175                 curr_frag_len = min(remaining, rx_frag_size);
2176
2177                 /* Coalesce all frags from the same physical page in one slot */
2178                 if (page_info->page_offset == 0) {
2179                         /* Fresh page */
2180                         j++;
2181                         skb_frag_set_page(skb, j, page_info->page);
2182                         skb_shinfo(skb)->frags[j].page_offset =
2183                                                         page_info->page_offset;
2184                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2185                         skb_shinfo(skb)->nr_frags++;
2186                 } else {
2187                         put_page(page_info->page);
2188                 }
2189
2190                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2191                 skb->len += curr_frag_len;
2192                 skb->data_len += curr_frag_len;
2193                 skb->truesize += rx_frag_size;
2194                 remaining -= curr_frag_len;
2195                 page_info->page = NULL;
2196         }
2197         BUG_ON(j > MAX_SKB_FRAGS);
2198 }
2199
2200 /* Process the RX completion indicated by rxcp when GRO is disabled */
2201 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2202                                 struct be_rx_compl_info *rxcp)
2203 {
2204         struct be_adapter *adapter = rxo->adapter;
2205         struct net_device *netdev = adapter->netdev;
2206         struct sk_buff *skb;
2207
2208         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2209         if (unlikely(!skb)) {
2210                 rx_stats(rxo)->rx_drops_no_skbs++;
2211                 be_rx_compl_discard(rxo, rxcp);
2212                 return;
2213         }
2214
2215         skb_fill_rx_data(rxo, skb, rxcp);
2216
2217         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2218                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2219         else
2220                 skb_checksum_none_assert(skb);
2221
2222         skb->protocol = eth_type_trans(skb, netdev);
2223         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2224         if (netdev->features & NETIF_F_RXHASH)
2225                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2226
2227         skb->csum_level = rxcp->tunneled;
2228         skb_mark_napi_id(skb, napi);
2229
2230         if (rxcp->vlanf)
2231                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2232
2233         netif_receive_skb(skb);
2234 }
2235
2236 /* Process the RX completion indicated by rxcp when GRO is enabled */
2237 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2238                                     struct napi_struct *napi,
2239                                     struct be_rx_compl_info *rxcp)
2240 {
2241         struct be_adapter *adapter = rxo->adapter;
2242         struct be_rx_page_info *page_info;
2243         struct sk_buff *skb = NULL;
2244         u16 remaining, curr_frag_len;
2245         u16 i, j;
2246
2247         skb = napi_get_frags(napi);
2248         if (!skb) {
2249                 be_rx_compl_discard(rxo, rxcp);
2250                 return;
2251         }
2252
2253         remaining = rxcp->pkt_size;
2254         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2255                 page_info = get_rx_page_info(rxo);
2256
2257                 curr_frag_len = min(remaining, rx_frag_size);
2258
2259                 /* Coalesce all frags from the same physical page in one slot */
2260                 if (i == 0 || page_info->page_offset == 0) {
2261                         /* First frag or Fresh page */
2262                         j++;
2263                         skb_frag_set_page(skb, j, page_info->page);
2264                         skb_shinfo(skb)->frags[j].page_offset =
2265                                                         page_info->page_offset;
2266                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2267                 } else {
2268                         put_page(page_info->page);
2269                 }
2270                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2271                 skb->truesize += rx_frag_size;
2272                 remaining -= curr_frag_len;
2273                 memset(page_info, 0, sizeof(*page_info));
2274         }
2275         BUG_ON(j > MAX_SKB_FRAGS);
2276
2277         skb_shinfo(skb)->nr_frags = j + 1;
2278         skb->len = rxcp->pkt_size;
2279         skb->data_len = rxcp->pkt_size;
2280         skb->ip_summed = CHECKSUM_UNNECESSARY;
2281         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2282         if (adapter->netdev->features & NETIF_F_RXHASH)
2283                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2284
2285         skb->csum_level = rxcp->tunneled;
2286
2287         if (rxcp->vlanf)
2288                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2289
2290         napi_gro_frags(napi);
2291 }
2292
2293 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2294                                  struct be_rx_compl_info *rxcp)
2295 {
2296         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2297         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2298         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2299         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2300         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2301         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2302         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2303         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2304         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2305         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2306         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2307         if (rxcp->vlanf) {
2308                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2309                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2310         }
2311         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2312         rxcp->tunneled =
2313                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2314 }
2315
2316 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2317                                  struct be_rx_compl_info *rxcp)
2318 {
2319         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2320         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2321         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2322         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2323         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2324         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2325         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2326         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2327         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2328         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2329         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2330         if (rxcp->vlanf) {
2331                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2332                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2333         }
2334         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2335         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2336 }
2337
2338 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2339 {
2340         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2341         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2342         struct be_adapter *adapter = rxo->adapter;
2343
2344         /* For checking the valid bit it is Ok to use either definition as the
2345          * valid bit is at the same position in both v0 and v1 Rx compl */
2346         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2347                 return NULL;
2348
2349         rmb();
2350         be_dws_le_to_cpu(compl, sizeof(*compl));
2351
2352         if (adapter->be3_native)
2353                 be_parse_rx_compl_v1(compl, rxcp);
2354         else
2355                 be_parse_rx_compl_v0(compl, rxcp);
2356
2357         if (rxcp->ip_frag)
2358                 rxcp->l4_csum = 0;
2359
2360         if (rxcp->vlanf) {
2361                 /* In QNQ modes, if qnq bit is not set, then the packet was
2362                  * tagged only with the transparent outer vlan-tag and must
2363                  * not be treated as a vlan packet by host
2364                  */
2365                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2366                         rxcp->vlanf = 0;
2367
2368                 if (!lancer_chip(adapter))
2369                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2370
2371                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2372                     !test_bit(rxcp->vlan_tag, adapter->vids))
2373                         rxcp->vlanf = 0;
2374         }
2375
2376         /* As the compl has been parsed, reset it; we wont touch it again */
2377         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2378
2379         queue_tail_inc(&rxo->cq);
2380         return rxcp;
2381 }
2382
2383 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2384 {
2385         u32 order = get_order(size);
2386
2387         if (order > 0)
2388                 gfp |= __GFP_COMP;
2389         return  alloc_pages(gfp, order);
2390 }
2391
2392 /*
2393  * Allocate a page, split it to fragments of size rx_frag_size and post as
2394  * receive buffers to BE
2395  */
2396 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2397 {
2398         struct be_adapter *adapter = rxo->adapter;
2399         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2400         struct be_queue_info *rxq = &rxo->q;
2401         struct page *pagep = NULL;
2402         struct device *dev = &adapter->pdev->dev;
2403         struct be_eth_rx_d *rxd;
2404         u64 page_dmaaddr = 0, frag_dmaaddr;
2405         u32 posted, page_offset = 0, notify = 0;
2406
2407         page_info = &rxo->page_info_tbl[rxq->head];
2408         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2409                 if (!pagep) {
2410                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2411                         if (unlikely(!pagep)) {
2412                                 rx_stats(rxo)->rx_post_fail++;
2413                                 break;
2414                         }
2415                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2416                                                     adapter->big_page_size,
2417                                                     DMA_FROM_DEVICE);
2418                         if (dma_mapping_error(dev, page_dmaaddr)) {
2419                                 put_page(pagep);
2420                                 pagep = NULL;
2421                                 adapter->drv_stats.dma_map_errors++;
2422                                 break;
2423                         }
2424                         page_offset = 0;
2425                 } else {
2426                         get_page(pagep);
2427                         page_offset += rx_frag_size;
2428                 }
2429                 page_info->page_offset = page_offset;
2430                 page_info->page = pagep;
2431
2432                 rxd = queue_head_node(rxq);
2433                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2434                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2435                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2436
2437                 /* Any space left in the current big page for another frag? */
2438                 if ((page_offset + rx_frag_size + rx_frag_size) >
2439                                         adapter->big_page_size) {
2440                         pagep = NULL;
2441                         page_info->last_frag = true;
2442                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2443                 } else {
2444                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2445                 }
2446
2447                 prev_page_info = page_info;
2448                 queue_head_inc(rxq);
2449                 page_info = &rxo->page_info_tbl[rxq->head];
2450         }
2451
2452         /* Mark the last frag of a page when we break out of the above loop
2453          * with no more slots available in the RXQ
2454          */
2455         if (pagep) {
2456                 prev_page_info->last_frag = true;
2457                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2458         }
2459
2460         if (posted) {
2461                 atomic_add(posted, &rxq->used);
2462                 if (rxo->rx_post_starved)
2463                         rxo->rx_post_starved = false;
2464                 do {
2465                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2466                         be_rxq_notify(adapter, rxq->id, notify);
2467                         posted -= notify;
2468                 } while (posted);
2469         } else if (atomic_read(&rxq->used) == 0) {
2470                 /* Let be_worker replenish when memory is available */
2471                 rxo->rx_post_starved = true;
2472         }
2473 }
2474
2475 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2476 {
2477         struct be_queue_info *tx_cq = &txo->cq;
2478         struct be_tx_compl_info *txcp = &txo->txcp;
2479         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2480
2481         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2482                 return NULL;
2483
2484         /* Ensure load ordering of valid bit dword and other dwords below */
2485         rmb();
2486         be_dws_le_to_cpu(compl, sizeof(*compl));
2487
2488         txcp->status = GET_TX_COMPL_BITS(status, compl);
2489         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2490
2491         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2492         queue_tail_inc(tx_cq);
2493         return txcp;
2494 }
2495
2496 static u16 be_tx_compl_process(struct be_adapter *adapter,
2497                                struct be_tx_obj *txo, u16 last_index)
2498 {
2499         struct sk_buff **sent_skbs = txo->sent_skb_list;
2500         struct be_queue_info *txq = &txo->q;
2501         struct sk_buff *skb = NULL;
2502         bool unmap_skb_hdr = false;
2503         struct be_eth_wrb *wrb;
2504         u16 num_wrbs = 0;
2505         u32 frag_index;
2506
2507         do {
2508                 if (sent_skbs[txq->tail]) {
2509                         /* Free skb from prev req */
2510                         if (skb)
2511                                 dev_consume_skb_any(skb);
2512                         skb = sent_skbs[txq->tail];
2513                         sent_skbs[txq->tail] = NULL;
2514                         queue_tail_inc(txq);  /* skip hdr wrb */
2515                         num_wrbs++;
2516                         unmap_skb_hdr = true;
2517                 }
2518                 wrb = queue_tail_node(txq);
2519                 frag_index = txq->tail;
2520                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2521                               (unmap_skb_hdr && skb_headlen(skb)));
2522                 unmap_skb_hdr = false;
2523                 queue_tail_inc(txq);
2524                 num_wrbs++;
2525         } while (frag_index != last_index);
2526         dev_consume_skb_any(skb);
2527
2528         return num_wrbs;
2529 }
2530
2531 /* Return the number of events in the event queue */
2532 static inline int events_get(struct be_eq_obj *eqo)
2533 {
2534         struct be_eq_entry *eqe;
2535         int num = 0;
2536
2537         do {
2538                 eqe = queue_tail_node(&eqo->q);
2539                 if (eqe->evt == 0)
2540                         break;
2541
2542                 rmb();
2543                 eqe->evt = 0;
2544                 num++;
2545                 queue_tail_inc(&eqo->q);
2546         } while (true);
2547
2548         return num;
2549 }
2550
2551 /* Leaves the EQ is disarmed state */
2552 static void be_eq_clean(struct be_eq_obj *eqo)
2553 {
2554         int num = events_get(eqo);
2555
2556         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2557 }
2558
2559 /* Free posted rx buffers that were not used */
2560 static void be_rxq_clean(struct be_rx_obj *rxo)
2561 {
2562         struct be_queue_info *rxq = &rxo->q;
2563         struct be_rx_page_info *page_info;
2564
2565         while (atomic_read(&rxq->used) > 0) {
2566                 page_info = get_rx_page_info(rxo);
2567                 put_page(page_info->page);
2568                 memset(page_info, 0, sizeof(*page_info));
2569         }
2570         BUG_ON(atomic_read(&rxq->used));
2571         rxq->tail = 0;
2572         rxq->head = 0;
2573 }
2574
2575 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2576 {
2577         struct be_queue_info *rx_cq = &rxo->cq;
2578         struct be_rx_compl_info *rxcp;
2579         struct be_adapter *adapter = rxo->adapter;
2580         int flush_wait = 0;
2581
2582         /* Consume pending rx completions.
2583          * Wait for the flush completion (identified by zero num_rcvd)
2584          * to arrive. Notify CQ even when there are no more CQ entries
2585          * for HW to flush partially coalesced CQ entries.
2586          * In Lancer, there is no need to wait for flush compl.
2587          */
2588         for (;;) {
2589                 rxcp = be_rx_compl_get(rxo);
2590                 if (!rxcp) {
2591                         if (lancer_chip(adapter))
2592                                 break;
2593
2594                         if (flush_wait++ > 50 ||
2595                             be_check_error(adapter,
2596                                            BE_ERROR_HW)) {
2597                                 dev_warn(&adapter->pdev->dev,
2598                                          "did not receive flush compl\n");
2599                                 break;
2600                         }
2601                         be_cq_notify(adapter, rx_cq->id, true, 0);
2602                         mdelay(1);
2603                 } else {
2604                         be_rx_compl_discard(rxo, rxcp);
2605                         be_cq_notify(adapter, rx_cq->id, false, 1);
2606                         if (rxcp->num_rcvd == 0)
2607                                 break;
2608                 }
2609         }
2610
2611         /* After cleanup, leave the CQ in unarmed state */
2612         be_cq_notify(adapter, rx_cq->id, false, 0);
2613 }
2614
2615 static void be_tx_compl_clean(struct be_adapter *adapter)
2616 {
2617         struct device *dev = &adapter->pdev->dev;
2618         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2619         struct be_tx_compl_info *txcp;
2620         struct be_queue_info *txq;
2621         u32 end_idx, notified_idx;
2622         struct be_tx_obj *txo;
2623         int i, pending_txqs;
2624
2625         /* Stop polling for compls when HW has been silent for 10ms */
2626         do {
2627                 pending_txqs = adapter->num_tx_qs;
2628
2629                 for_all_tx_queues(adapter, txo, i) {
2630                         cmpl = 0;
2631                         num_wrbs = 0;
2632                         txq = &txo->q;
2633                         while ((txcp = be_tx_compl_get(txo))) {
2634                                 num_wrbs +=
2635                                         be_tx_compl_process(adapter, txo,
2636                                                             txcp->end_index);
2637                                 cmpl++;
2638                         }
2639                         if (cmpl) {
2640                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2641                                 atomic_sub(num_wrbs, &txq->used);
2642                                 timeo = 0;
2643                         }
2644                         if (!be_is_tx_compl_pending(txo))
2645                                 pending_txqs--;
2646                 }
2647
2648                 if (pending_txqs == 0 || ++timeo > 10 ||
2649                     be_check_error(adapter, BE_ERROR_HW))
2650                         break;
2651
2652                 mdelay(1);
2653         } while (true);
2654
2655         /* Free enqueued TX that was never notified to HW */
2656         for_all_tx_queues(adapter, txo, i) {
2657                 txq = &txo->q;
2658
2659                 if (atomic_read(&txq->used)) {
2660                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2661                                  i, atomic_read(&txq->used));
2662                         notified_idx = txq->tail;
2663                         end_idx = txq->tail;
2664                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2665                                   txq->len);
2666                         /* Use the tx-compl process logic to handle requests
2667                          * that were not sent to the HW.
2668                          */
2669                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2670                         atomic_sub(num_wrbs, &txq->used);
2671                         BUG_ON(atomic_read(&txq->used));
2672                         txo->pend_wrb_cnt = 0;
2673                         /* Since hw was never notified of these requests,
2674                          * reset TXQ indices
2675                          */
2676                         txq->head = notified_idx;
2677                         txq->tail = notified_idx;
2678                 }
2679         }
2680 }
2681
2682 static void be_evt_queues_destroy(struct be_adapter *adapter)
2683 {
2684         struct be_eq_obj *eqo;
2685         int i;
2686
2687         for_all_evt_queues(adapter, eqo, i) {
2688                 if (eqo->q.created) {
2689                         be_eq_clean(eqo);
2690                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2691                         napi_hash_del(&eqo->napi);
2692                         netif_napi_del(&eqo->napi);
2693                         free_cpumask_var(eqo->affinity_mask);
2694                 }
2695                 be_queue_free(adapter, &eqo->q);
2696         }
2697 }
2698
2699 static int be_evt_queues_create(struct be_adapter *adapter)
2700 {
2701         struct be_queue_info *eq;
2702         struct be_eq_obj *eqo;
2703         struct be_aic_obj *aic;
2704         int i, rc;
2705
2706         /* need enough EQs to service both RX and TX queues */
2707         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2708                                     max(adapter->cfg_num_rx_irqs,
2709                                         adapter->cfg_num_tx_irqs));
2710
2711         for_all_evt_queues(adapter, eqo, i) {
2712                 int numa_node = dev_to_node(&adapter->pdev->dev);
2713
2714                 aic = &adapter->aic_obj[i];
2715                 eqo->adapter = adapter;
2716                 eqo->idx = i;
2717                 aic->max_eqd = BE_MAX_EQD;
2718                 aic->enable = true;
2719
2720                 eq = &eqo->q;
2721                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2722                                     sizeof(struct be_eq_entry));
2723                 if (rc)
2724                         return rc;
2725
2726                 rc = be_cmd_eq_create(adapter, eqo);
2727                 if (rc)
2728                         return rc;
2729
2730                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2731                         return -ENOMEM;
2732                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2733                                 eqo->affinity_mask);
2734                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2735                                BE_NAPI_WEIGHT);
2736         }
2737         return 0;
2738 }
2739
2740 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2741 {
2742         struct be_queue_info *q;
2743
2744         q = &adapter->mcc_obj.q;
2745         if (q->created)
2746                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2747         be_queue_free(adapter, q);
2748
2749         q = &adapter->mcc_obj.cq;
2750         if (q->created)
2751                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2752         be_queue_free(adapter, q);
2753 }
2754
2755 /* Must be called only after TX qs are created as MCC shares TX EQ */
2756 static int be_mcc_queues_create(struct be_adapter *adapter)
2757 {
2758         struct be_queue_info *q, *cq;
2759
2760         cq = &adapter->mcc_obj.cq;
2761         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2762                            sizeof(struct be_mcc_compl)))
2763                 goto err;
2764
2765         /* Use the default EQ for MCC completions */
2766         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2767                 goto mcc_cq_free;
2768
2769         q = &adapter->mcc_obj.q;
2770         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2771                 goto mcc_cq_destroy;
2772
2773         if (be_cmd_mccq_create(adapter, q, cq))
2774                 goto mcc_q_free;
2775
2776         return 0;
2777
2778 mcc_q_free:
2779         be_queue_free(adapter, q);
2780 mcc_cq_destroy:
2781         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2782 mcc_cq_free:
2783         be_queue_free(adapter, cq);
2784 err:
2785         return -1;
2786 }
2787
2788 static void be_tx_queues_destroy(struct be_adapter *adapter)
2789 {
2790         struct be_queue_info *q;
2791         struct be_tx_obj *txo;
2792         u8 i;
2793
2794         for_all_tx_queues(adapter, txo, i) {
2795                 q = &txo->q;
2796                 if (q->created)
2797                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2798                 be_queue_free(adapter, q);
2799
2800                 q = &txo->cq;
2801                 if (q->created)
2802                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2803                 be_queue_free(adapter, q);
2804         }
2805 }
2806
2807 static int be_tx_qs_create(struct be_adapter *adapter)
2808 {
2809         struct be_queue_info *cq;
2810         struct be_tx_obj *txo;
2811         struct be_eq_obj *eqo;
2812         int status, i;
2813
2814         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2815
2816         for_all_tx_queues(adapter, txo, i) {
2817                 cq = &txo->cq;
2818                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2819                                         sizeof(struct be_eth_tx_compl));
2820                 if (status)
2821                         return status;
2822
2823                 u64_stats_init(&txo->stats.sync);
2824                 u64_stats_init(&txo->stats.sync_compl);
2825
2826                 /* If num_evt_qs is less than num_tx_qs, then more than
2827                  * one txq share an eq
2828                  */
2829                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2830                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2831                 if (status)
2832                         return status;
2833
2834                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2835                                         sizeof(struct be_eth_wrb));
2836                 if (status)
2837                         return status;
2838
2839                 status = be_cmd_txq_create(adapter, txo);
2840                 if (status)
2841                         return status;
2842
2843                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2844                                     eqo->idx);
2845         }
2846
2847         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2848                  adapter->num_tx_qs);
2849         return 0;
2850 }
2851
2852 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2853 {
2854         struct be_queue_info *q;
2855         struct be_rx_obj *rxo;
2856         int i;
2857
2858         for_all_rx_queues(adapter, rxo, i) {
2859                 q = &rxo->cq;
2860                 if (q->created)
2861                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2862                 be_queue_free(adapter, q);
2863         }
2864 }
2865
2866 static int be_rx_cqs_create(struct be_adapter *adapter)
2867 {
2868         struct be_queue_info *eq, *cq;
2869         struct be_rx_obj *rxo;
2870         int rc, i;
2871
2872         adapter->num_rss_qs =
2873                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2874
2875         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2876         if (adapter->num_rss_qs < 2)
2877                 adapter->num_rss_qs = 0;
2878
2879         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2880
2881         /* When the interface is not capable of RSS rings (and there is no
2882          * need to create a default RXQ) we'll still need one RXQ
2883          */
2884         if (adapter->num_rx_qs == 0)
2885                 adapter->num_rx_qs = 1;
2886
2887         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2888         for_all_rx_queues(adapter, rxo, i) {
2889                 rxo->adapter = adapter;
2890                 cq = &rxo->cq;
2891                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2892                                     sizeof(struct be_eth_rx_compl));
2893                 if (rc)
2894                         return rc;
2895
2896                 u64_stats_init(&rxo->stats.sync);
2897                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2898                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2899                 if (rc)
2900                         return rc;
2901         }
2902
2903         dev_info(&adapter->pdev->dev,
2904                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2905         return 0;
2906 }
2907
2908 static irqreturn_t be_intx(int irq, void *dev)
2909 {
2910         struct be_eq_obj *eqo = dev;
2911         struct be_adapter *adapter = eqo->adapter;
2912         int num_evts = 0;
2913
2914         /* IRQ is not expected when NAPI is scheduled as the EQ
2915          * will not be armed.
2916          * But, this can happen on Lancer INTx where it takes
2917          * a while to de-assert INTx or in BE2 where occasionaly
2918          * an interrupt may be raised even when EQ is unarmed.
2919          * If NAPI is already scheduled, then counting & notifying
2920          * events will orphan them.
2921          */
2922         if (napi_schedule_prep(&eqo->napi)) {
2923                 num_evts = events_get(eqo);
2924                 __napi_schedule(&eqo->napi);
2925                 if (num_evts)
2926                         eqo->spurious_intr = 0;
2927         }
2928         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2929
2930         /* Return IRQ_HANDLED only for the the first spurious intr
2931          * after a valid intr to stop the kernel from branding
2932          * this irq as a bad one!
2933          */
2934         if (num_evts || eqo->spurious_intr++ == 0)
2935                 return IRQ_HANDLED;
2936         else
2937                 return IRQ_NONE;
2938 }
2939
2940 static irqreturn_t be_msix(int irq, void *dev)
2941 {
2942         struct be_eq_obj *eqo = dev;
2943
2944         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2945         napi_schedule(&eqo->napi);
2946         return IRQ_HANDLED;
2947 }
2948
2949 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2950 {
2951         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2952 }
2953
2954 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2955                          int budget, int polling)
2956 {
2957         struct be_adapter *adapter = rxo->adapter;
2958         struct be_queue_info *rx_cq = &rxo->cq;
2959         struct be_rx_compl_info *rxcp;
2960         u32 work_done;
2961         u32 frags_consumed = 0;
2962
2963         for (work_done = 0; work_done < budget; work_done++) {
2964                 rxcp = be_rx_compl_get(rxo);
2965                 if (!rxcp)
2966                         break;
2967
2968                 /* Is it a flush compl that has no data */
2969                 if (unlikely(rxcp->num_rcvd == 0))
2970                         goto loop_continue;
2971
2972                 /* Discard compl with partial DMA Lancer B0 */
2973                 if (unlikely(!rxcp->pkt_size)) {
2974                         be_rx_compl_discard(rxo, rxcp);
2975                         goto loop_continue;
2976                 }
2977
2978                 /* On BE drop pkts that arrive due to imperfect filtering in
2979                  * promiscuous mode on some skews
2980                  */
2981                 if (unlikely(rxcp->port != adapter->port_num &&
2982                              !lancer_chip(adapter))) {
2983                         be_rx_compl_discard(rxo, rxcp);
2984                         goto loop_continue;
2985                 }
2986
2987                 /* Don't do gro when we're busy_polling */
2988                 if (do_gro(rxcp) && polling != BUSY_POLLING)
2989                         be_rx_compl_process_gro(rxo, napi, rxcp);
2990                 else
2991                         be_rx_compl_process(rxo, napi, rxcp);
2992
2993 loop_continue:
2994                 frags_consumed += rxcp->num_rcvd;
2995                 be_rx_stats_update(rxo, rxcp);
2996         }
2997
2998         if (work_done) {
2999                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3000
3001                 /* When an rx-obj gets into post_starved state, just
3002                  * let be_worker do the posting.
3003                  */
3004                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3005                     !rxo->rx_post_starved)
3006                         be_post_rx_frags(rxo, GFP_ATOMIC,
3007                                          max_t(u32, MAX_RX_POST,
3008                                                frags_consumed));
3009         }
3010
3011         return work_done;
3012 }
3013
3014 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3015 {
3016         switch (status) {
3017         case BE_TX_COMP_HDR_PARSE_ERR:
3018                 tx_stats(txo)->tx_hdr_parse_err++;
3019                 break;
3020         case BE_TX_COMP_NDMA_ERR:
3021                 tx_stats(txo)->tx_dma_err++;
3022                 break;
3023         case BE_TX_COMP_ACL_ERR:
3024                 tx_stats(txo)->tx_spoof_check_err++;
3025                 break;
3026         }
3027 }
3028
3029 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3030 {
3031         switch (status) {
3032         case LANCER_TX_COMP_LSO_ERR:
3033                 tx_stats(txo)->tx_tso_err++;
3034                 break;
3035         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3036         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3037                 tx_stats(txo)->tx_spoof_check_err++;
3038                 break;
3039         case LANCER_TX_COMP_QINQ_ERR:
3040                 tx_stats(txo)->tx_qinq_err++;
3041                 break;
3042         case LANCER_TX_COMP_PARITY_ERR:
3043                 tx_stats(txo)->tx_internal_parity_err++;
3044                 break;
3045         case LANCER_TX_COMP_DMA_ERR:
3046                 tx_stats(txo)->tx_dma_err++;
3047                 break;
3048         }
3049 }
3050
3051 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3052                           int idx)
3053 {
3054         int num_wrbs = 0, work_done = 0;
3055         struct be_tx_compl_info *txcp;
3056
3057         while ((txcp = be_tx_compl_get(txo))) {
3058                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3059                 work_done++;
3060
3061                 if (txcp->status) {
3062                         if (lancer_chip(adapter))
3063                                 lancer_update_tx_err(txo, txcp->status);
3064                         else
3065                                 be_update_tx_err(txo, txcp->status);
3066                 }
3067         }
3068
3069         if (work_done) {
3070                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3071                 atomic_sub(num_wrbs, &txo->q.used);
3072
3073                 /* As Tx wrbs have been freed up, wake up netdev queue
3074                  * if it was stopped due to lack of tx wrbs.  */
3075                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3076                     be_can_txq_wake(txo)) {
3077                         netif_wake_subqueue(adapter->netdev, idx);
3078                 }
3079
3080                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3081                 tx_stats(txo)->tx_compl += work_done;
3082                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3083         }
3084 }
3085
3086 #ifdef CONFIG_NET_RX_BUSY_POLL
3087 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3088 {
3089         bool status = true;
3090
3091         spin_lock(&eqo->lock); /* BH is already disabled */
3092         if (eqo->state & BE_EQ_LOCKED) {
3093                 WARN_ON(eqo->state & BE_EQ_NAPI);
3094                 eqo->state |= BE_EQ_NAPI_YIELD;
3095                 status = false;
3096         } else {
3097                 eqo->state = BE_EQ_NAPI;
3098         }
3099         spin_unlock(&eqo->lock);
3100         return status;
3101 }
3102
3103 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3104 {
3105         spin_lock(&eqo->lock); /* BH is already disabled */
3106
3107         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3108         eqo->state = BE_EQ_IDLE;
3109
3110         spin_unlock(&eqo->lock);
3111 }
3112
3113 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3114 {
3115         bool status = true;
3116
3117         spin_lock_bh(&eqo->lock);
3118         if (eqo->state & BE_EQ_LOCKED) {
3119                 eqo->state |= BE_EQ_POLL_YIELD;
3120                 status = false;
3121         } else {
3122                 eqo->state |= BE_EQ_POLL;
3123         }
3124         spin_unlock_bh(&eqo->lock);
3125         return status;
3126 }
3127
3128 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3129 {
3130         spin_lock_bh(&eqo->lock);
3131
3132         WARN_ON(eqo->state & (BE_EQ_NAPI));
3133         eqo->state = BE_EQ_IDLE;
3134
3135         spin_unlock_bh(&eqo->lock);
3136 }
3137
3138 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3139 {
3140         spin_lock_init(&eqo->lock);
3141         eqo->state = BE_EQ_IDLE;
3142 }
3143
3144 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3145 {
3146         local_bh_disable();
3147
3148         /* It's enough to just acquire napi lock on the eqo to stop
3149          * be_busy_poll() from processing any queueus.
3150          */
3151         while (!be_lock_napi(eqo))
3152                 mdelay(1);
3153
3154         local_bh_enable();
3155 }
3156
3157 #else /* CONFIG_NET_RX_BUSY_POLL */
3158
3159 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3160 {
3161         return true;
3162 }
3163
3164 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3165 {
3166 }
3167
3168 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3169 {
3170         return false;
3171 }
3172
3173 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3174 {
3175 }
3176
3177 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3178 {
3179 }
3180
3181 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3182 {
3183 }
3184 #endif /* CONFIG_NET_RX_BUSY_POLL */
3185
3186 int be_poll(struct napi_struct *napi, int budget)
3187 {
3188         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3189         struct be_adapter *adapter = eqo->adapter;
3190         int max_work = 0, work, i, num_evts;
3191         struct be_rx_obj *rxo;
3192         struct be_tx_obj *txo;
3193         u32 mult_enc = 0;
3194
3195         num_evts = events_get(eqo);
3196
3197         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3198                 be_process_tx(adapter, txo, i);
3199
3200         if (be_lock_napi(eqo)) {
3201                 /* This loop will iterate twice for EQ0 in which
3202                  * completions of the last RXQ (default one) are also processed
3203                  * For other EQs the loop iterates only once
3204                  */
3205                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3206                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3207                         max_work = max(work, max_work);
3208                 }
3209                 be_unlock_napi(eqo);
3210         } else {
3211                 max_work = budget;
3212         }
3213
3214         if (is_mcc_eqo(eqo))
3215                 be_process_mcc(adapter);
3216
3217         if (max_work < budget) {
3218                 napi_complete(napi);
3219
3220                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3221                  * delay via a delay multiplier encoding value
3222                  */
3223                 if (skyhawk_chip(adapter))
3224                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3225
3226                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3227                              mult_enc);
3228         } else {
3229                 /* As we'll continue in polling mode, count and clear events */
3230                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3231         }
3232         return max_work;
3233 }
3234
3235 #ifdef CONFIG_NET_RX_BUSY_POLL
3236 static int be_busy_poll(struct napi_struct *napi)
3237 {
3238         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3239         struct be_adapter *adapter = eqo->adapter;
3240         struct be_rx_obj *rxo;
3241         int i, work = 0;
3242
3243         if (!be_lock_busy_poll(eqo))
3244                 return LL_FLUSH_BUSY;
3245
3246         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3247                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3248                 if (work)
3249                         break;
3250         }
3251
3252         be_unlock_busy_poll(eqo);
3253         return work;
3254 }
3255 #endif
3256
3257 void be_detect_error(struct be_adapter *adapter)
3258 {
3259         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3260         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3261         u32 i;
3262         struct device *dev = &adapter->pdev->dev;
3263
3264         if (be_check_error(adapter, BE_ERROR_HW))
3265                 return;
3266
3267         if (lancer_chip(adapter)) {
3268                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3269                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3270                         be_set_error(adapter, BE_ERROR_UE);
3271                         sliport_err1 = ioread32(adapter->db +
3272                                                 SLIPORT_ERROR1_OFFSET);
3273                         sliport_err2 = ioread32(adapter->db +
3274                                                 SLIPORT_ERROR2_OFFSET);
3275                         /* Do not log error messages if its a FW reset */
3276                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3277                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3278                                 dev_info(dev, "Firmware update in progress\n");
3279                         } else {
3280                                 dev_err(dev, "Error detected in the card\n");
3281                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3282                                         sliport_status);
3283                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3284                                         sliport_err1);
3285                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3286                                         sliport_err2);
3287                         }
3288                 }
3289         } else {
3290                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3291                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3292                 ue_lo_mask = ioread32(adapter->pcicfg +
3293                                       PCICFG_UE_STATUS_LOW_MASK);
3294                 ue_hi_mask = ioread32(adapter->pcicfg +
3295                                       PCICFG_UE_STATUS_HI_MASK);
3296
3297                 ue_lo = (ue_lo & ~ue_lo_mask);
3298                 ue_hi = (ue_hi & ~ue_hi_mask);
3299
3300                 /* On certain platforms BE hardware can indicate spurious UEs.
3301                  * Allow HW to stop working completely in case of a real UE.
3302                  * Hence not setting the hw_error for UE detection.
3303                  */
3304
3305                 if (ue_lo || ue_hi) {
3306                         dev_err(dev,
3307                                 "Unrecoverable Error detected in the adapter");
3308                         dev_err(dev, "Please reboot server to recover");
3309                         if (skyhawk_chip(adapter))
3310                                 be_set_error(adapter, BE_ERROR_UE);
3311
3312                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3313                                 if (ue_lo & 1)
3314                                         dev_err(dev, "UE: %s bit set\n",
3315                                                 ue_status_low_desc[i]);
3316                         }
3317                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3318                                 if (ue_hi & 1)
3319                                         dev_err(dev, "UE: %s bit set\n",
3320                                                 ue_status_hi_desc[i]);
3321                         }
3322                 }
3323         }
3324 }
3325
3326 static void be_msix_disable(struct be_adapter *adapter)
3327 {
3328         if (msix_enabled(adapter)) {
3329                 pci_disable_msix(adapter->pdev);
3330                 adapter->num_msix_vec = 0;
3331                 adapter->num_msix_roce_vec = 0;
3332         }
3333 }
3334
3335 static int be_msix_enable(struct be_adapter *adapter)
3336 {
3337         unsigned int i, max_roce_eqs;
3338         struct device *dev = &adapter->pdev->dev;
3339         int num_vec;
3340
3341         /* If RoCE is supported, program the max number of vectors that
3342          * could be used for NIC and RoCE, else, just program the number
3343          * we'll use initially.
3344          */
3345         if (be_roce_supported(adapter)) {
3346                 max_roce_eqs =
3347                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3348                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3349                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3350         } else {
3351                 num_vec = max(adapter->cfg_num_rx_irqs,
3352                               adapter->cfg_num_tx_irqs);
3353         }
3354
3355         for (i = 0; i < num_vec; i++)
3356                 adapter->msix_entries[i].entry = i;
3357
3358         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3359                                         MIN_MSIX_VECTORS, num_vec);
3360         if (num_vec < 0)
3361                 goto fail;
3362
3363         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3364                 adapter->num_msix_roce_vec = num_vec / 2;
3365                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3366                          adapter->num_msix_roce_vec);
3367         }
3368
3369         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3370
3371         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3372                  adapter->num_msix_vec);
3373         return 0;
3374
3375 fail:
3376         dev_warn(dev, "MSIx enable failed\n");
3377
3378         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3379         if (be_virtfn(adapter))
3380                 return num_vec;
3381         return 0;
3382 }
3383
3384 static inline int be_msix_vec_get(struct be_adapter *adapter,
3385                                   struct be_eq_obj *eqo)
3386 {
3387         return adapter->msix_entries[eqo->msix_idx].vector;
3388 }
3389
3390 static int be_msix_register(struct be_adapter *adapter)
3391 {
3392         struct net_device *netdev = adapter->netdev;
3393         struct be_eq_obj *eqo;
3394         int status, i, vec;
3395
3396         for_all_evt_queues(adapter, eqo, i) {
3397                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3398                 vec = be_msix_vec_get(adapter, eqo);
3399                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3400                 if (status)
3401                         goto err_msix;
3402
3403                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3404         }
3405
3406         return 0;
3407 err_msix:
3408         for (i--; i >= 0; i--) {
3409                 eqo = &adapter->eq_obj[i];
3410                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3411         }
3412         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3413                  status);
3414         be_msix_disable(adapter);
3415         return status;
3416 }
3417
3418 static int be_irq_register(struct be_adapter *adapter)
3419 {
3420         struct net_device *netdev = adapter->netdev;
3421         int status;
3422
3423         if (msix_enabled(adapter)) {
3424                 status = be_msix_register(adapter);
3425                 if (status == 0)
3426                         goto done;
3427                 /* INTx is not supported for VF */
3428                 if (be_virtfn(adapter))
3429                         return status;
3430         }
3431
3432         /* INTx: only the first EQ is used */
3433         netdev->irq = adapter->pdev->irq;
3434         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3435                              &adapter->eq_obj[0]);
3436         if (status) {
3437                 dev_err(&adapter->pdev->dev,
3438                         "INTx request IRQ failed - err %d\n", status);
3439                 return status;
3440         }
3441 done:
3442         adapter->isr_registered = true;
3443         return 0;
3444 }
3445
3446 static void be_irq_unregister(struct be_adapter *adapter)
3447 {
3448         struct net_device *netdev = adapter->netdev;
3449         struct be_eq_obj *eqo;
3450         int i, vec;
3451
3452         if (!adapter->isr_registered)
3453                 return;
3454
3455         /* INTx */
3456         if (!msix_enabled(adapter)) {
3457                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3458                 goto done;
3459         }
3460
3461         /* MSIx */
3462         for_all_evt_queues(adapter, eqo, i) {
3463                 vec = be_msix_vec_get(adapter, eqo);
3464                 irq_set_affinity_hint(vec, NULL);
3465                 free_irq(vec, eqo);
3466         }
3467
3468 done:
3469         adapter->isr_registered = false;
3470 }
3471
3472 static void be_rx_qs_destroy(struct be_adapter *adapter)
3473 {
3474         struct rss_info *rss = &adapter->rss_info;
3475         struct be_queue_info *q;
3476         struct be_rx_obj *rxo;
3477         int i;
3478
3479         for_all_rx_queues(adapter, rxo, i) {
3480                 q = &rxo->q;
3481                 if (q->created) {
3482                         /* If RXQs are destroyed while in an "out of buffer"
3483                          * state, there is a possibility of an HW stall on
3484                          * Lancer. So, post 64 buffers to each queue to relieve
3485                          * the "out of buffer" condition.
3486                          * Make sure there's space in the RXQ before posting.
3487                          */
3488                         if (lancer_chip(adapter)) {
3489                                 be_rx_cq_clean(rxo);
3490                                 if (atomic_read(&q->used) == 0)
3491                                         be_post_rx_frags(rxo, GFP_KERNEL,
3492                                                          MAX_RX_POST);
3493                         }
3494
3495                         be_cmd_rxq_destroy(adapter, q);
3496                         be_rx_cq_clean(rxo);
3497                         be_rxq_clean(rxo);
3498                 }
3499                 be_queue_free(adapter, q);
3500         }
3501
3502         if (rss->rss_flags) {
3503                 rss->rss_flags = RSS_ENABLE_NONE;
3504                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3505                                   128, rss->rss_hkey);
3506         }
3507 }
3508
3509 static void be_disable_if_filters(struct be_adapter *adapter)
3510 {
3511         be_cmd_pmac_del(adapter, adapter->if_handle,
3512                         adapter->pmac_id[0], 0);
3513
3514         be_clear_uc_list(adapter);
3515         be_clear_mc_list(adapter);
3516
3517         /* The IFACE flags are enabled in the open path and cleared
3518          * in the close path. When a VF gets detached from the host and
3519          * assigned to a VM the following happens:
3520          *      - VF's IFACE flags get cleared in the detach path
3521          *      - IFACE create is issued by the VF in the attach path
3522          * Due to a bug in the BE3/Skyhawk-R FW
3523          * (Lancer FW doesn't have the bug), the IFACE capability flags
3524          * specified along with the IFACE create cmd issued by a VF are not
3525          * honoured by FW.  As a consequence, if a *new* driver
3526          * (that enables/disables IFACE flags in open/close)
3527          * is loaded in the host and an *old* driver is * used by a VM/VF,
3528          * the IFACE gets created *without* the needed flags.
3529          * To avoid this, disable RX-filter flags only for Lancer.
3530          */
3531         if (lancer_chip(adapter)) {
3532                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3533                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3534         }
3535 }
3536
3537 static int be_close(struct net_device *netdev)
3538 {
3539         struct be_adapter *adapter = netdev_priv(netdev);
3540         struct be_eq_obj *eqo;
3541         int i;
3542
3543         /* This protection is needed as be_close() may be called even when the
3544          * adapter is in cleared state (after eeh perm failure)
3545          */
3546         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3547                 return 0;
3548
3549         be_disable_if_filters(adapter);
3550
3551         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3552                 for_all_evt_queues(adapter, eqo, i) {
3553                         napi_disable(&eqo->napi);
3554                         be_disable_busy_poll(eqo);
3555                 }
3556                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3557         }
3558
3559         be_async_mcc_disable(adapter);
3560
3561         /* Wait for all pending tx completions to arrive so that
3562          * all tx skbs are freed.
3563          */
3564         netif_tx_disable(netdev);
3565         be_tx_compl_clean(adapter);
3566
3567         be_rx_qs_destroy(adapter);
3568
3569         for_all_evt_queues(adapter, eqo, i) {
3570                 if (msix_enabled(adapter))
3571                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3572                 else
3573                         synchronize_irq(netdev->irq);
3574                 be_eq_clean(eqo);
3575         }
3576
3577         be_irq_unregister(adapter);
3578
3579         return 0;
3580 }
3581
3582 static int be_rx_qs_create(struct be_adapter *adapter)
3583 {
3584         struct rss_info *rss = &adapter->rss_info;
3585         u8 rss_key[RSS_HASH_KEY_LEN];
3586         struct be_rx_obj *rxo;
3587         int rc, i, j;
3588
3589         for_all_rx_queues(adapter, rxo, i) {
3590                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3591                                     sizeof(struct be_eth_rx_d));
3592                 if (rc)
3593                         return rc;
3594         }
3595
3596         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3597                 rxo = default_rxo(adapter);
3598                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3599                                        rx_frag_size, adapter->if_handle,
3600                                        false, &rxo->rss_id);
3601                 if (rc)
3602                         return rc;
3603         }
3604
3605         for_all_rss_queues(adapter, rxo, i) {
3606                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3607                                        rx_frag_size, adapter->if_handle,
3608                                        true, &rxo->rss_id);
3609                 if (rc)
3610                         return rc;
3611         }
3612
3613         if (be_multi_rxq(adapter)) {
3614                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3615                         for_all_rss_queues(adapter, rxo, i) {
3616                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3617                                         break;
3618                                 rss->rsstable[j + i] = rxo->rss_id;
3619                                 rss->rss_queue[j + i] = i;
3620                         }
3621                 }
3622                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3623                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3624
3625                 if (!BEx_chip(adapter))
3626                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3627                                 RSS_ENABLE_UDP_IPV6;
3628
3629                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3630                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3631                                        RSS_INDIR_TABLE_LEN, rss_key);
3632                 if (rc) {
3633                         rss->rss_flags = RSS_ENABLE_NONE;
3634                         return rc;
3635                 }
3636
3637                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3638         } else {
3639                 /* Disable RSS, if only default RX Q is created */
3640                 rss->rss_flags = RSS_ENABLE_NONE;
3641         }
3642
3643
3644         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3645          * which is a queue empty condition
3646          */
3647         for_all_rx_queues(adapter, rxo, i)
3648                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3649
3650         return 0;
3651 }
3652
3653 static int be_enable_if_filters(struct be_adapter *adapter)
3654 {
3655         int status;
3656
3657         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3658         if (status)
3659                 return status;
3660
3661         /* For BE3 VFs, the PF programs the initial MAC address */
3662         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3663                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3664                                          adapter->if_handle,
3665                                          &adapter->pmac_id[0], 0);
3666                 if (status)
3667                         return status;
3668         }
3669
3670         if (adapter->vlans_added)
3671                 be_vid_config(adapter);
3672
3673         be_set_rx_mode(adapter->netdev);
3674
3675         return 0;
3676 }
3677
3678 static int be_open(struct net_device *netdev)
3679 {
3680         struct be_adapter *adapter = netdev_priv(netdev);
3681         struct be_eq_obj *eqo;
3682         struct be_rx_obj *rxo;
3683         struct be_tx_obj *txo;
3684         u8 link_status;
3685         int status, i;
3686
3687         status = be_rx_qs_create(adapter);
3688         if (status)
3689                 goto err;
3690
3691         status = be_enable_if_filters(adapter);
3692         if (status)
3693                 goto err;
3694
3695         status = be_irq_register(adapter);
3696         if (status)
3697                 goto err;
3698
3699         for_all_rx_queues(adapter, rxo, i)
3700                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3701
3702         for_all_tx_queues(adapter, txo, i)
3703                 be_cq_notify(adapter, txo->cq.id, true, 0);
3704
3705         be_async_mcc_enable(adapter);
3706
3707         for_all_evt_queues(adapter, eqo, i) {
3708                 napi_enable(&eqo->napi);
3709                 be_enable_busy_poll(eqo);
3710                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3711         }
3712         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3713
3714         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3715         if (!status)
3716                 be_link_status_update(adapter, link_status);
3717
3718         netif_tx_start_all_queues(netdev);
3719         if (skyhawk_chip(adapter))
3720                 udp_tunnel_get_rx_info(netdev);
3721
3722         return 0;
3723 err:
3724         be_close(adapter->netdev);
3725         return -EIO;
3726 }
3727
3728 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3729 {
3730         u32 addr;
3731
3732         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3733
3734         mac[5] = (u8)(addr & 0xFF);
3735         mac[4] = (u8)((addr >> 8) & 0xFF);
3736         mac[3] = (u8)((addr >> 16) & 0xFF);
3737         /* Use the OUI from the current MAC address */
3738         memcpy(mac, adapter->netdev->dev_addr, 3);
3739 }
3740
3741 /*
3742  * Generate a seed MAC address from the PF MAC Address using jhash.
3743  * MAC Address for VFs are assigned incrementally starting from the seed.
3744  * These addresses are programmed in the ASIC by the PF and the VF driver
3745  * queries for the MAC address during its probe.
3746  */
3747 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3748 {
3749         u32 vf;
3750         int status = 0;
3751         u8 mac[ETH_ALEN];
3752         struct be_vf_cfg *vf_cfg;
3753
3754         be_vf_eth_addr_generate(adapter, mac);
3755
3756         for_all_vfs(adapter, vf_cfg, vf) {
3757                 if (BEx_chip(adapter))
3758                         status = be_cmd_pmac_add(adapter, mac,
3759                                                  vf_cfg->if_handle,
3760                                                  &vf_cfg->pmac_id, vf + 1);
3761                 else
3762                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3763                                                 vf + 1);
3764
3765                 if (status)
3766                         dev_err(&adapter->pdev->dev,
3767                                 "Mac address assignment failed for VF %d\n",
3768                                 vf);
3769                 else
3770                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3771
3772                 mac[5] += 1;
3773         }
3774         return status;
3775 }
3776
3777 static int be_vfs_mac_query(struct be_adapter *adapter)
3778 {
3779         int status, vf;
3780         u8 mac[ETH_ALEN];
3781         struct be_vf_cfg *vf_cfg;
3782
3783         for_all_vfs(adapter, vf_cfg, vf) {
3784                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3785                                                mac, vf_cfg->if_handle,
3786                                                false, vf+1);
3787                 if (status)
3788                         return status;
3789                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3790         }
3791         return 0;
3792 }
3793
3794 static void be_vf_clear(struct be_adapter *adapter)
3795 {
3796         struct be_vf_cfg *vf_cfg;
3797         u32 vf;
3798
3799         if (pci_vfs_assigned(adapter->pdev)) {
3800                 dev_warn(&adapter->pdev->dev,
3801                          "VFs are assigned to VMs: not disabling VFs\n");
3802                 goto done;
3803         }
3804
3805         pci_disable_sriov(adapter->pdev);
3806
3807         for_all_vfs(adapter, vf_cfg, vf) {
3808                 if (BEx_chip(adapter))
3809                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3810                                         vf_cfg->pmac_id, vf + 1);
3811                 else
3812                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3813                                        vf + 1);
3814
3815                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3816         }
3817
3818         if (BE3_chip(adapter))
3819                 be_cmd_set_hsw_config(adapter, 0, 0,
3820                                       adapter->if_handle,
3821                                       PORT_FWD_TYPE_PASSTHRU, 0);
3822 done:
3823         kfree(adapter->vf_cfg);
3824         adapter->num_vfs = 0;
3825         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3826 }
3827
3828 static void be_clear_queues(struct be_adapter *adapter)
3829 {
3830         be_mcc_queues_destroy(adapter);
3831         be_rx_cqs_destroy(adapter);
3832         be_tx_queues_destroy(adapter);
3833         be_evt_queues_destroy(adapter);
3834 }
3835
3836 static void be_cancel_worker(struct be_adapter *adapter)
3837 {
3838         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3839                 cancel_delayed_work_sync(&adapter->work);
3840                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3841         }
3842 }
3843
3844 static void be_cancel_err_detection(struct be_adapter *adapter)
3845 {
3846         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3847                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3848                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3849         }
3850 }
3851
3852 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3853 {
3854         struct net_device *netdev = adapter->netdev;
3855
3856         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3857                 be_cmd_manage_iface(adapter, adapter->if_handle,
3858                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3859
3860         if (adapter->vxlan_port)
3861                 be_cmd_set_vxlan_port(adapter, 0);
3862
3863         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3864         adapter->vxlan_port = 0;
3865
3866         netdev->hw_enc_features = 0;
3867         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3868         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3869 }
3870
3871 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3872                                 struct be_resources *vft_res)
3873 {
3874         struct be_resources res = adapter->pool_res;
3875         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3876         struct be_resources res_mod = {0};
3877         u16 num_vf_qs = 1;
3878
3879         /* Distribute the queue resources among the PF and it's VFs */
3880         if (num_vfs) {
3881                 /* Divide the rx queues evenly among the VFs and the PF, capped
3882                  * at VF-EQ-count. Any remainder queues belong to the PF.
3883                  */
3884                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3885                                 res.max_rss_qs / (num_vfs + 1));
3886
3887                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3888                  * RSS Tables per port. Provide RSS on VFs, only if number of
3889                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3890                  */
3891                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3892                         num_vf_qs = 1;
3893         }
3894
3895         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3896          * which are modifiable using SET_PROFILE_CONFIG cmd.
3897          */
3898         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3899                                   RESOURCE_MODIFIABLE, 0);
3900
3901         /* If RSS IFACE capability flags are modifiable for a VF, set the
3902          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3903          * more than 1 RSSQ is available for a VF.
3904          * Otherwise, provision only 1 queue pair for VF.
3905          */
3906         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3907                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3908                 if (num_vf_qs > 1) {
3909                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3910                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3911                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3912                 } else {
3913                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3914                                              BE_IF_FLAGS_DEFQ_RSS);
3915                 }
3916         } else {
3917                 num_vf_qs = 1;
3918         }
3919
3920         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3921                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3922                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3923         }
3924
3925         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3926         vft_res->max_rx_qs = num_vf_qs;
3927         vft_res->max_rss_qs = num_vf_qs;
3928         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3929         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3930
3931         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3932          * among the PF and it's VFs, if the fields are changeable
3933          */
3934         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3935                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3936
3937         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3938                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3939
3940         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3941                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3942
3943         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3944                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3945 }
3946
3947 static int be_clear(struct be_adapter *adapter)
3948 {
3949         struct pci_dev *pdev = adapter->pdev;
3950         struct  be_resources vft_res = {0};
3951
3952         be_cancel_worker(adapter);
3953
3954         if (sriov_enabled(adapter))
3955                 be_vf_clear(adapter);
3956
3957         /* Re-configure FW to distribute resources evenly across max-supported
3958          * number of VFs, only when VFs are not already enabled.
3959          */
3960         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3961             !pci_vfs_assigned(pdev)) {
3962                 be_calculate_vf_res(adapter,
3963                                     pci_sriov_get_totalvfs(pdev),
3964                                     &vft_res);
3965                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3966                                         pci_sriov_get_totalvfs(pdev),
3967                                         &vft_res);
3968         }
3969
3970         be_disable_vxlan_offloads(adapter);
3971         kfree(adapter->pmac_id);
3972         adapter->pmac_id = NULL;
3973
3974         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3975
3976         be_clear_queues(adapter);
3977
3978         be_msix_disable(adapter);
3979         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3980         return 0;
3981 }
3982
3983 static int be_vfs_if_create(struct be_adapter *adapter)
3984 {
3985         struct be_resources res = {0};
3986         u32 cap_flags, en_flags, vf;
3987         struct be_vf_cfg *vf_cfg;
3988         int status;
3989
3990         /* If a FW profile exists, then cap_flags are updated */
3991         cap_flags = BE_VF_IF_EN_FLAGS;
3992
3993         for_all_vfs(adapter, vf_cfg, vf) {
3994                 if (!BE3_chip(adapter)) {
3995                         status = be_cmd_get_profile_config(adapter, &res, NULL,
3996                                                            ACTIVE_PROFILE_TYPE,
3997                                                            RESOURCE_LIMITS,
3998                                                            vf + 1);
3999                         if (!status) {
4000                                 cap_flags = res.if_cap_flags;
4001                                 /* Prevent VFs from enabling VLAN promiscuous
4002                                  * mode
4003                                  */
4004                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4005                         }
4006                 }
4007
4008                 /* PF should enable IF flags during proxy if_create call */
4009                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4010                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4011                                           &vf_cfg->if_handle, vf + 1);
4012                 if (status)
4013                         return status;
4014         }
4015
4016         return 0;
4017 }
4018
4019 static int be_vf_setup_init(struct be_adapter *adapter)
4020 {
4021         struct be_vf_cfg *vf_cfg;
4022         int vf;
4023
4024         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4025                                   GFP_KERNEL);
4026         if (!adapter->vf_cfg)
4027                 return -ENOMEM;
4028
4029         for_all_vfs(adapter, vf_cfg, vf) {
4030                 vf_cfg->if_handle = -1;
4031                 vf_cfg->pmac_id = -1;
4032         }
4033         return 0;
4034 }
4035
4036 static int be_vf_setup(struct be_adapter *adapter)
4037 {
4038         struct device *dev = &adapter->pdev->dev;
4039         struct be_vf_cfg *vf_cfg;
4040         int status, old_vfs, vf;
4041         bool spoofchk;
4042
4043         old_vfs = pci_num_vf(adapter->pdev);
4044
4045         status = be_vf_setup_init(adapter);
4046         if (status)
4047                 goto err;
4048
4049         if (old_vfs) {
4050                 for_all_vfs(adapter, vf_cfg, vf) {
4051                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4052                         if (status)
4053                                 goto err;
4054                 }
4055
4056                 status = be_vfs_mac_query(adapter);
4057                 if (status)
4058                         goto err;
4059         } else {
4060                 status = be_vfs_if_create(adapter);
4061                 if (status)
4062                         goto err;
4063
4064                 status = be_vf_eth_addr_config(adapter);
4065                 if (status)
4066                         goto err;
4067         }
4068
4069         for_all_vfs(adapter, vf_cfg, vf) {
4070                 /* Allow VFs to programs MAC/VLAN filters */
4071                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4072                                                   vf + 1);
4073                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4074                         status = be_cmd_set_fn_privileges(adapter,
4075                                                           vf_cfg->privileges |
4076                                                           BE_PRIV_FILTMGMT,
4077                                                           vf + 1);
4078                         if (!status) {
4079                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4080                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4081                                          vf);
4082                         }
4083                 }
4084
4085                 /* Allow full available bandwidth */
4086                 if (!old_vfs)
4087                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4088
4089                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4090                                                vf_cfg->if_handle, NULL,
4091                                                &spoofchk);
4092                 if (!status)
4093                         vf_cfg->spoofchk = spoofchk;
4094
4095                 if (!old_vfs) {
4096                         be_cmd_enable_vf(adapter, vf + 1);
4097                         be_cmd_set_logical_link_config(adapter,
4098                                                        IFLA_VF_LINK_STATE_AUTO,
4099                                                        vf+1);
4100                 }
4101         }
4102
4103         if (!old_vfs) {
4104                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4105                 if (status) {
4106                         dev_err(dev, "SRIOV enable failed\n");
4107                         adapter->num_vfs = 0;
4108                         goto err;
4109                 }
4110         }
4111
4112         if (BE3_chip(adapter)) {
4113                 /* On BE3, enable VEB only when SRIOV is enabled */
4114                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4115                                                adapter->if_handle,
4116                                                PORT_FWD_TYPE_VEB, 0);
4117                 if (status)
4118                         goto err;
4119         }
4120
4121         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4122         return 0;
4123 err:
4124         dev_err(dev, "VF setup failed\n");
4125         be_vf_clear(adapter);
4126         return status;
4127 }
4128
4129 /* Converting function_mode bits on BE3 to SH mc_type enums */
4130
4131 static u8 be_convert_mc_type(u32 function_mode)
4132 {
4133         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4134                 return vNIC1;
4135         else if (function_mode & QNQ_MODE)
4136                 return FLEX10;
4137         else if (function_mode & VNIC_MODE)
4138                 return vNIC2;
4139         else if (function_mode & UMC_ENABLED)
4140                 return UMC;
4141         else
4142                 return MC_NONE;
4143 }
4144
4145 /* On BE2/BE3 FW does not suggest the supported limits */
4146 static void BEx_get_resources(struct be_adapter *adapter,
4147                               struct be_resources *res)
4148 {
4149         bool use_sriov = adapter->num_vfs ? 1 : 0;
4150
4151         if (be_physfn(adapter))
4152                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4153         else
4154                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4155
4156         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4157
4158         if (be_is_mc(adapter)) {
4159                 /* Assuming that there are 4 channels per port,
4160                  * when multi-channel is enabled
4161                  */
4162                 if (be_is_qnq_mode(adapter))
4163                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4164                 else
4165                         /* In a non-qnq multichannel mode, the pvid
4166                          * takes up one vlan entry
4167                          */
4168                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4169         } else {
4170                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4171         }
4172
4173         res->max_mcast_mac = BE_MAX_MC;
4174
4175         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4176          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4177          *    *only* if it is RSS-capable.
4178          */
4179         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4180             be_virtfn(adapter) ||
4181             (be_is_mc(adapter) &&
4182              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4183                 res->max_tx_qs = 1;
4184         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4185                 struct be_resources super_nic_res = {0};
4186
4187                 /* On a SuperNIC profile, the driver needs to use the
4188                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4189                  */
4190                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4191                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4192                                           0);
4193                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4194                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4195         } else {
4196                 res->max_tx_qs = BE3_MAX_TX_QS;
4197         }
4198
4199         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4200             !use_sriov && be_physfn(adapter))
4201                 res->max_rss_qs = (adapter->be3_native) ?
4202                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4203         res->max_rx_qs = res->max_rss_qs + 1;
4204
4205         if (be_physfn(adapter))
4206                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4207                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4208         else
4209                 res->max_evt_qs = 1;
4210
4211         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4212         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4213         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4214                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4215 }
4216
4217 static void be_setup_init(struct be_adapter *adapter)
4218 {
4219         adapter->vlan_prio_bmap = 0xff;
4220         adapter->phy.link_speed = -1;
4221         adapter->if_handle = -1;
4222         adapter->be3_native = false;
4223         adapter->if_flags = 0;
4224         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4225         if (be_physfn(adapter))
4226                 adapter->cmd_privileges = MAX_PRIVILEGES;
4227         else
4228                 adapter->cmd_privileges = MIN_PRIVILEGES;
4229 }
4230
4231 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4232  * However, this HW limitation is not exposed to the host via any SLI cmd.
4233  * As a result, in the case of SRIOV and in particular multi-partition configs
4234  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4235  * for distribution between the VFs. This self-imposed limit will determine the
4236  * no: of VFs for which RSS can be enabled.
4237  */
4238 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4239 {
4240         struct be_port_resources port_res = {0};
4241         u8 rss_tables_on_port;
4242         u16 max_vfs = be_max_vfs(adapter);
4243
4244         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4245                                   RESOURCE_LIMITS, 0);
4246
4247         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4248
4249         /* Each PF Pool's RSS Tables limit =
4250          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4251          */
4252         adapter->pool_res.max_rss_tables =
4253                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4254 }
4255
4256 static int be_get_sriov_config(struct be_adapter *adapter)
4257 {
4258         struct be_resources res = {0};
4259         int max_vfs, old_vfs;
4260
4261         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4262                                   RESOURCE_LIMITS, 0);
4263
4264         /* Some old versions of BE3 FW don't report max_vfs value */
4265         if (BE3_chip(adapter) && !res.max_vfs) {
4266                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4267                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4268         }
4269
4270         adapter->pool_res = res;
4271
4272         /* If during previous unload of the driver, the VFs were not disabled,
4273          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4274          * Instead use the TotalVFs value stored in the pci-dev struct.
4275          */
4276         old_vfs = pci_num_vf(adapter->pdev);
4277         if (old_vfs) {
4278                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4279                          old_vfs);
4280
4281                 adapter->pool_res.max_vfs =
4282                         pci_sriov_get_totalvfs(adapter->pdev);
4283                 adapter->num_vfs = old_vfs;
4284         }
4285
4286         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4287                 be_calculate_pf_pool_rss_tables(adapter);
4288                 dev_info(&adapter->pdev->dev,
4289                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4290                          be_max_pf_pool_rss_tables(adapter));
4291         }
4292         return 0;
4293 }
4294
4295 static void be_alloc_sriov_res(struct be_adapter *adapter)
4296 {
4297         int old_vfs = pci_num_vf(adapter->pdev);
4298         struct  be_resources vft_res = {0};
4299         int status;
4300
4301         be_get_sriov_config(adapter);
4302
4303         if (!old_vfs)
4304                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4305
4306         /* When the HW is in SRIOV capable configuration, the PF-pool
4307          * resources are given to PF during driver load, if there are no
4308          * old VFs. This facility is not available in BE3 FW.
4309          * Also, this is done by FW in Lancer chip.
4310          */
4311         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4312                 be_calculate_vf_res(adapter, 0, &vft_res);
4313                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4314                                                  &vft_res);
4315                 if (status)
4316                         dev_err(&adapter->pdev->dev,
4317                                 "Failed to optimize SRIOV resources\n");
4318         }
4319 }
4320
4321 static int be_get_resources(struct be_adapter *adapter)
4322 {
4323         struct device *dev = &adapter->pdev->dev;
4324         struct be_resources res = {0};
4325         int status;
4326
4327         /* For Lancer, SH etc read per-function resource limits from FW.
4328          * GET_FUNC_CONFIG returns per function guaranteed limits.
4329          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4330          */
4331         if (BEx_chip(adapter)) {
4332                 BEx_get_resources(adapter, &res);
4333         } else {
4334                 status = be_cmd_get_func_config(adapter, &res);
4335                 if (status)
4336                         return status;
4337
4338                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4339                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4340                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4341                         res.max_rss_qs -= 1;
4342         }
4343
4344         /* If RoCE is supported stash away half the EQs for RoCE */
4345         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4346                                 res.max_evt_qs / 2 : res.max_evt_qs;
4347         adapter->res = res;
4348
4349         /* If FW supports RSS default queue, then skip creating non-RSS
4350          * queue for non-IP traffic.
4351          */
4352         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4353                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4354
4355         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4356                  be_max_txqs(adapter), be_max_rxqs(adapter),
4357                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4358                  be_max_vfs(adapter));
4359         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4360                  be_max_uc(adapter), be_max_mc(adapter),
4361                  be_max_vlans(adapter));
4362
4363         /* Ensure RX and TX queues are created in pairs at init time */
4364         adapter->cfg_num_rx_irqs =
4365                                 min_t(u16, netif_get_num_default_rss_queues(),
4366                                       be_max_qp_irqs(adapter));
4367         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4368         return 0;
4369 }
4370
4371 static int be_get_config(struct be_adapter *adapter)
4372 {
4373         int status, level;
4374         u16 profile_id;
4375
4376         status = be_cmd_get_cntl_attributes(adapter);
4377         if (status)
4378                 return status;
4379
4380         status = be_cmd_query_fw_cfg(adapter);
4381         if (status)
4382                 return status;
4383
4384         if (!lancer_chip(adapter) && be_physfn(adapter))
4385                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4386
4387         if (BEx_chip(adapter)) {
4388                 level = be_cmd_get_fw_log_level(adapter);
4389                 adapter->msg_enable =
4390                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4391         }
4392
4393         be_cmd_get_acpi_wol_cap(adapter);
4394         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4395         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4396
4397         be_cmd_query_port_name(adapter);
4398
4399         if (be_physfn(adapter)) {
4400                 status = be_cmd_get_active_profile(adapter, &profile_id);
4401                 if (!status)
4402                         dev_info(&adapter->pdev->dev,
4403                                  "Using profile 0x%x\n", profile_id);
4404         }
4405
4406         return 0;
4407 }
4408
4409 static int be_mac_setup(struct be_adapter *adapter)
4410 {
4411         u8 mac[ETH_ALEN];
4412         int status;
4413
4414         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4415                 status = be_cmd_get_perm_mac(adapter, mac);
4416                 if (status)
4417                         return status;
4418
4419                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4420                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4421         }
4422
4423         return 0;
4424 }
4425
4426 static void be_schedule_worker(struct be_adapter *adapter)
4427 {
4428         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4429         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4430 }
4431
4432 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4433 {
4434         schedule_delayed_work(&adapter->be_err_detection_work,
4435                               msecs_to_jiffies(delay));
4436         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4437 }
4438
4439 static int be_setup_queues(struct be_adapter *adapter)
4440 {
4441         struct net_device *netdev = adapter->netdev;
4442         int status;
4443
4444         status = be_evt_queues_create(adapter);
4445         if (status)
4446                 goto err;
4447
4448         status = be_tx_qs_create(adapter);
4449         if (status)
4450                 goto err;
4451
4452         status = be_rx_cqs_create(adapter);
4453         if (status)
4454                 goto err;
4455
4456         status = be_mcc_queues_create(adapter);
4457         if (status)
4458                 goto err;
4459
4460         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4461         if (status)
4462                 goto err;
4463
4464         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4465         if (status)
4466                 goto err;
4467
4468         return 0;
4469 err:
4470         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4471         return status;
4472 }
4473
4474 static int be_if_create(struct be_adapter *adapter)
4475 {
4476         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4477         u32 cap_flags = be_if_cap_flags(adapter);
4478         int status;
4479
4480         if (adapter->cfg_num_rx_irqs == 1)
4481                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4482
4483         en_flags &= cap_flags;
4484         /* will enable all the needed filter flags in be_open() */
4485         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4486                                   &adapter->if_handle, 0);
4487
4488         return status;
4489 }
4490
4491 int be_update_queues(struct be_adapter *adapter)
4492 {
4493         struct net_device *netdev = adapter->netdev;
4494         int status;
4495
4496         if (netif_running(netdev))
4497                 be_close(netdev);
4498
4499         be_cancel_worker(adapter);
4500
4501         /* If any vectors have been shared with RoCE we cannot re-program
4502          * the MSIx table.
4503          */
4504         if (!adapter->num_msix_roce_vec)
4505                 be_msix_disable(adapter);
4506
4507         be_clear_queues(adapter);
4508         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4509         if (status)
4510                 return status;
4511
4512         if (!msix_enabled(adapter)) {
4513                 status = be_msix_enable(adapter);
4514                 if (status)
4515                         return status;
4516         }
4517
4518         status = be_if_create(adapter);
4519         if (status)
4520                 return status;
4521
4522         status = be_setup_queues(adapter);
4523         if (status)
4524                 return status;
4525
4526         be_schedule_worker(adapter);
4527
4528         if (netif_running(netdev))
4529                 status = be_open(netdev);
4530
4531         return status;
4532 }
4533
4534 static inline int fw_major_num(const char *fw_ver)
4535 {
4536         int fw_major = 0, i;
4537
4538         i = sscanf(fw_ver, "%d.", &fw_major);
4539         if (i != 1)
4540                 return 0;
4541
4542         return fw_major;
4543 }
4544
4545 /* If any VFs are already enabled don't FLR the PF */
4546 static bool be_reset_required(struct be_adapter *adapter)
4547 {
4548         return pci_num_vf(adapter->pdev) ? false : true;
4549 }
4550
4551 /* Wait for the FW to be ready and perform the required initialization */
4552 static int be_func_init(struct be_adapter *adapter)
4553 {
4554         int status;
4555
4556         status = be_fw_wait_ready(adapter);
4557         if (status)
4558                 return status;
4559
4560         if (be_reset_required(adapter)) {
4561                 status = be_cmd_reset_function(adapter);
4562                 if (status)
4563                         return status;
4564
4565                 /* Wait for interrupts to quiesce after an FLR */
4566                 msleep(100);
4567
4568                 /* We can clear all errors when function reset succeeds */
4569                 be_clear_error(adapter, BE_CLEAR_ALL);
4570         }
4571
4572         /* Tell FW we're ready to fire cmds */
4573         status = be_cmd_fw_init(adapter);
4574         if (status)
4575                 return status;
4576
4577         /* Allow interrupts for other ULPs running on NIC function */
4578         be_intr_set(adapter, true);
4579
4580         return 0;
4581 }
4582
4583 static int be_setup(struct be_adapter *adapter)
4584 {
4585         struct device *dev = &adapter->pdev->dev;
4586         int status;
4587
4588         status = be_func_init(adapter);
4589         if (status)
4590                 return status;
4591
4592         be_setup_init(adapter);
4593
4594         if (!lancer_chip(adapter))
4595                 be_cmd_req_native_mode(adapter);
4596
4597         /* invoke this cmd first to get pf_num and vf_num which are needed
4598          * for issuing profile related cmds
4599          */
4600         if (!BEx_chip(adapter)) {
4601                 status = be_cmd_get_func_config(adapter, NULL);
4602                 if (status)
4603                         return status;
4604         }
4605
4606         status = be_get_config(adapter);
4607         if (status)
4608                 goto err;
4609
4610         if (!BE2_chip(adapter) && be_physfn(adapter))
4611                 be_alloc_sriov_res(adapter);
4612
4613         status = be_get_resources(adapter);
4614         if (status)
4615                 goto err;
4616
4617         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4618                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4619         if (!adapter->pmac_id)
4620                 return -ENOMEM;
4621
4622         status = be_msix_enable(adapter);
4623         if (status)
4624                 goto err;
4625
4626         /* will enable all the needed filter flags in be_open() */
4627         status = be_if_create(adapter);
4628         if (status)
4629                 goto err;
4630
4631         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4632         rtnl_lock();
4633         status = be_setup_queues(adapter);
4634         rtnl_unlock();
4635         if (status)
4636                 goto err;
4637
4638         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4639
4640         status = be_mac_setup(adapter);
4641         if (status)
4642                 goto err;
4643
4644         be_cmd_get_fw_ver(adapter);
4645         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4646
4647         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4648                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4649                         adapter->fw_ver);
4650                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4651         }
4652
4653         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4654                                          adapter->rx_fc);
4655         if (status)
4656                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4657                                         &adapter->rx_fc);
4658
4659         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4660                  adapter->tx_fc, adapter->rx_fc);
4661
4662         if (be_physfn(adapter))
4663                 be_cmd_set_logical_link_config(adapter,
4664                                                IFLA_VF_LINK_STATE_AUTO, 0);
4665
4666         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4667          * confusing a linux bridge or OVS that it might be connected to.
4668          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4669          * when SRIOV is not enabled.
4670          */
4671         if (BE3_chip(adapter))
4672                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4673                                       PORT_FWD_TYPE_PASSTHRU, 0);
4674
4675         if (adapter->num_vfs)
4676                 be_vf_setup(adapter);
4677
4678         status = be_cmd_get_phy_info(adapter);
4679         if (!status && be_pause_supported(adapter))
4680                 adapter->phy.fc_autoneg = 1;
4681
4682         be_schedule_worker(adapter);
4683         adapter->flags |= BE_FLAGS_SETUP_DONE;
4684         return 0;
4685 err:
4686         be_clear(adapter);
4687         return status;
4688 }
4689
4690 #ifdef CONFIG_NET_POLL_CONTROLLER
4691 static void be_netpoll(struct net_device *netdev)
4692 {
4693         struct be_adapter *adapter = netdev_priv(netdev);
4694         struct be_eq_obj *eqo;
4695         int i;
4696
4697         for_all_evt_queues(adapter, eqo, i) {
4698                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4699                 napi_schedule(&eqo->napi);
4700         }
4701 }
4702 #endif
4703
4704 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4705 {
4706         const struct firmware *fw;
4707         int status;
4708
4709         if (!netif_running(adapter->netdev)) {
4710                 dev_err(&adapter->pdev->dev,
4711                         "Firmware load not allowed (interface is down)\n");
4712                 return -ENETDOWN;
4713         }
4714
4715         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4716         if (status)
4717                 goto fw_exit;
4718
4719         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4720
4721         if (lancer_chip(adapter))
4722                 status = lancer_fw_download(adapter, fw);
4723         else
4724                 status = be_fw_download(adapter, fw);
4725
4726         if (!status)
4727                 be_cmd_get_fw_ver(adapter);
4728
4729 fw_exit:
4730         release_firmware(fw);
4731         return status;
4732 }
4733
4734 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4735                                  u16 flags)
4736 {
4737         struct be_adapter *adapter = netdev_priv(dev);
4738         struct nlattr *attr, *br_spec;
4739         int rem;
4740         int status = 0;
4741         u16 mode = 0;
4742
4743         if (!sriov_enabled(adapter))
4744                 return -EOPNOTSUPP;
4745
4746         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4747         if (!br_spec)
4748                 return -EINVAL;
4749
4750         nla_for_each_nested(attr, br_spec, rem) {
4751                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4752                         continue;
4753
4754                 if (nla_len(attr) < sizeof(mode))
4755                         return -EINVAL;
4756
4757                 mode = nla_get_u16(attr);
4758                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4759                         return -EOPNOTSUPP;
4760
4761                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4762                         return -EINVAL;
4763
4764                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4765                                                adapter->if_handle,
4766                                                mode == BRIDGE_MODE_VEPA ?
4767                                                PORT_FWD_TYPE_VEPA :
4768                                                PORT_FWD_TYPE_VEB, 0);
4769                 if (status)
4770                         goto err;
4771
4772                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4773                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4774
4775                 return status;
4776         }
4777 err:
4778         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4779                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4780
4781         return status;
4782 }
4783
4784 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4785                                  struct net_device *dev, u32 filter_mask,
4786                                  int nlflags)
4787 {
4788         struct be_adapter *adapter = netdev_priv(dev);
4789         int status = 0;
4790         u8 hsw_mode;
4791
4792         /* BE and Lancer chips support VEB mode only */
4793         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4794                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4795                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4796                         return 0;
4797                 hsw_mode = PORT_FWD_TYPE_VEB;
4798         } else {
4799                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4800                                                adapter->if_handle, &hsw_mode,
4801                                                NULL);
4802                 if (status)
4803                         return 0;
4804
4805                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4806                         return 0;
4807         }
4808
4809         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4810                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4811                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4812                                        0, 0, nlflags, filter_mask, NULL);
4813 }
4814
4815 /* VxLAN offload Notes:
4816  *
4817  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4818  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4819  * is expected to work across all types of IP tunnels once exported. Skyhawk
4820  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4821  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4822  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4823  * those other tunnels are unexported on the fly through ndo_features_check().
4824  *
4825  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4826  * adds more than one port, disable offloads and don't re-enable them again
4827  * until after all the tunnels are removed.
4828  */
4829 static void be_add_vxlan_port(struct net_device *netdev,
4830                               struct udp_tunnel_info *ti)
4831 {
4832         struct be_adapter *adapter = netdev_priv(netdev);
4833         struct device *dev = &adapter->pdev->dev;
4834         __be16 port = ti->port;
4835         int status;
4836
4837         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4838                 return;
4839
4840         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4841                 return;
4842
4843         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4844                 adapter->vxlan_port_aliases++;
4845                 return;
4846         }
4847
4848         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4849                 dev_info(dev,
4850                          "Only one UDP port supported for VxLAN offloads\n");
4851                 dev_info(dev, "Disabling VxLAN offloads\n");
4852                 adapter->vxlan_port_count++;
4853                 goto err;
4854         }
4855
4856         if (adapter->vxlan_port_count++ >= 1)
4857                 return;
4858
4859         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4860                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4861         if (status) {
4862                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4863                 goto err;
4864         }
4865
4866         status = be_cmd_set_vxlan_port(adapter, port);
4867         if (status) {
4868                 dev_warn(dev, "Failed to add VxLAN port\n");
4869                 goto err;
4870         }
4871         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4872         adapter->vxlan_port = port;
4873
4874         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4875                                    NETIF_F_TSO | NETIF_F_TSO6 |
4876                                    NETIF_F_GSO_UDP_TUNNEL;
4877         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4878         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4879
4880         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4881                  be16_to_cpu(port));
4882         return;
4883 err:
4884         be_disable_vxlan_offloads(adapter);
4885 }
4886
4887 static void be_del_vxlan_port(struct net_device *netdev,
4888                               struct udp_tunnel_info *ti)
4889 {
4890         struct be_adapter *adapter = netdev_priv(netdev);
4891         __be16 port = ti->port;
4892
4893         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4894                 return;
4895
4896         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4897                 return;
4898
4899         if (adapter->vxlan_port != port)
4900                 goto done;
4901
4902         if (adapter->vxlan_port_aliases) {
4903                 adapter->vxlan_port_aliases--;
4904                 return;
4905         }
4906
4907         be_disable_vxlan_offloads(adapter);
4908
4909         dev_info(&adapter->pdev->dev,
4910                  "Disabled VxLAN offloads for UDP port %d\n",
4911                  be16_to_cpu(port));
4912 done:
4913         adapter->vxlan_port_count--;
4914 }
4915
4916 static netdev_features_t be_features_check(struct sk_buff *skb,
4917                                            struct net_device *dev,
4918                                            netdev_features_t features)
4919 {
4920         struct be_adapter *adapter = netdev_priv(dev);
4921         u8 l4_hdr = 0;
4922
4923         /* The code below restricts offload features for some tunneled packets.
4924          * Offload features for normal (non tunnel) packets are unchanged.
4925          */
4926         if (!skb->encapsulation ||
4927             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4928                 return features;
4929
4930         /* It's an encapsulated packet and VxLAN offloads are enabled. We
4931          * should disable tunnel offload features if it's not a VxLAN packet,
4932          * as tunnel offloads have been enabled only for VxLAN. This is done to
4933          * allow other tunneled traffic like GRE work fine while VxLAN
4934          * offloads are configured in Skyhawk-R.
4935          */
4936         switch (vlan_get_protocol(skb)) {
4937         case htons(ETH_P_IP):
4938                 l4_hdr = ip_hdr(skb)->protocol;
4939                 break;
4940         case htons(ETH_P_IPV6):
4941                 l4_hdr = ipv6_hdr(skb)->nexthdr;
4942                 break;
4943         default:
4944                 return features;
4945         }
4946
4947         if (l4_hdr != IPPROTO_UDP ||
4948             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4949             skb->inner_protocol != htons(ETH_P_TEB) ||
4950             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4951             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4952                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4953
4954         return features;
4955 }
4956
4957 static int be_get_phys_port_id(struct net_device *dev,
4958                                struct netdev_phys_item_id *ppid)
4959 {
4960         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4961         struct be_adapter *adapter = netdev_priv(dev);
4962         u8 *id;
4963
4964         if (MAX_PHYS_ITEM_ID_LEN < id_len)
4965                 return -ENOSPC;
4966
4967         ppid->id[0] = adapter->hba_port_num + 1;
4968         id = &ppid->id[1];
4969         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4970              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4971                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4972
4973         ppid->id_len = id_len;
4974
4975         return 0;
4976 }
4977
4978 static const struct net_device_ops be_netdev_ops = {
4979         .ndo_open               = be_open,
4980         .ndo_stop               = be_close,
4981         .ndo_start_xmit         = be_xmit,
4982         .ndo_set_rx_mode        = be_set_rx_mode,
4983         .ndo_set_mac_address    = be_mac_addr_set,
4984         .ndo_change_mtu         = be_change_mtu,
4985         .ndo_get_stats64        = be_get_stats64,
4986         .ndo_validate_addr      = eth_validate_addr,
4987         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
4988         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
4989         .ndo_set_vf_mac         = be_set_vf_mac,
4990         .ndo_set_vf_vlan        = be_set_vf_vlan,
4991         .ndo_set_vf_rate        = be_set_vf_tx_rate,
4992         .ndo_get_vf_config      = be_get_vf_config,
4993         .ndo_set_vf_link_state  = be_set_vf_link_state,
4994         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4995 #ifdef CONFIG_NET_POLL_CONTROLLER
4996         .ndo_poll_controller    = be_netpoll,
4997 #endif
4998         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
4999         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5000 #ifdef CONFIG_NET_RX_BUSY_POLL
5001         .ndo_busy_poll          = be_busy_poll,
5002 #endif
5003         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5004         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5005         .ndo_features_check     = be_features_check,
5006         .ndo_get_phys_port_id   = be_get_phys_port_id,
5007 };
5008
5009 static void be_netdev_init(struct net_device *netdev)
5010 {
5011         struct be_adapter *adapter = netdev_priv(netdev);
5012
5013         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5014                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5015                 NETIF_F_HW_VLAN_CTAG_TX;
5016         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5017                 netdev->hw_features |= NETIF_F_RXHASH;
5018
5019         netdev->features |= netdev->hw_features |
5020                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5021
5022         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5023                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5024
5025         netdev->priv_flags |= IFF_UNICAST_FLT;
5026
5027         netdev->flags |= IFF_MULTICAST;
5028
5029         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5030
5031         netdev->netdev_ops = &be_netdev_ops;
5032
5033         netdev->ethtool_ops = &be_ethtool_ops;
5034 }
5035
5036 static void be_cleanup(struct be_adapter *adapter)
5037 {
5038         struct net_device *netdev = adapter->netdev;
5039
5040         rtnl_lock();
5041         netif_device_detach(netdev);
5042         if (netif_running(netdev))
5043                 be_close(netdev);
5044         rtnl_unlock();
5045
5046         be_clear(adapter);
5047 }
5048
5049 static int be_resume(struct be_adapter *adapter)
5050 {
5051         struct net_device *netdev = adapter->netdev;
5052         int status;
5053
5054         status = be_setup(adapter);
5055         if (status)
5056                 return status;
5057
5058         rtnl_lock();
5059         if (netif_running(netdev))
5060                 status = be_open(netdev);
5061         rtnl_unlock();
5062
5063         if (status)
5064                 return status;
5065
5066         netif_device_attach(netdev);
5067
5068         return 0;
5069 }
5070
5071 static int be_err_recover(struct be_adapter *adapter)
5072 {
5073         int status;
5074
5075         /* Error recovery is supported only Lancer as of now */
5076         if (!lancer_chip(adapter))
5077                 return -EIO;
5078
5079         /* Wait for adapter to reach quiescent state before
5080          * destroying queues
5081          */
5082         status = be_fw_wait_ready(adapter);
5083         if (status)
5084                 goto err;
5085
5086         be_cleanup(adapter);
5087
5088         status = be_resume(adapter);
5089         if (status)
5090                 goto err;
5091
5092         return 0;
5093 err:
5094         return status;
5095 }
5096
5097 static void be_err_detection_task(struct work_struct *work)
5098 {
5099         struct be_adapter *adapter =
5100                                 container_of(work, struct be_adapter,
5101                                              be_err_detection_work.work);
5102         struct device *dev = &adapter->pdev->dev;
5103         int recovery_status;
5104         int delay = ERR_DETECTION_DELAY;
5105
5106         be_detect_error(adapter);
5107
5108         if (be_check_error(adapter, BE_ERROR_HW))
5109                 recovery_status = be_err_recover(adapter);
5110         else
5111                 goto reschedule_task;
5112
5113         if (!recovery_status) {
5114                 adapter->recovery_retries = 0;
5115                 dev_info(dev, "Adapter recovery successful\n");
5116                 goto reschedule_task;
5117         } else if (be_virtfn(adapter)) {
5118                 /* For VFs, check if PF have allocated resources
5119                  * every second.
5120                  */
5121                 dev_err(dev, "Re-trying adapter recovery\n");
5122                 goto reschedule_task;
5123         } else if (adapter->recovery_retries++ <
5124                    MAX_ERR_RECOVERY_RETRY_COUNT) {
5125                 /* In case of another error during recovery, it takes 30 sec
5126                  * for adapter to come out of error. Retry error recovery after
5127                  * this time interval.
5128                  */
5129                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5130                 delay = ERR_RECOVERY_RETRY_DELAY;
5131                 goto reschedule_task;
5132         } else {
5133                 dev_err(dev, "Adapter recovery failed\n");
5134         }
5135
5136         return;
5137 reschedule_task:
5138         be_schedule_err_detection(adapter, delay);
5139 }
5140
5141 static void be_log_sfp_info(struct be_adapter *adapter)
5142 {
5143         int status;
5144
5145         status = be_cmd_query_sfp_info(adapter);
5146         if (!status) {
5147                 dev_err(&adapter->pdev->dev,
5148                         "Port %c: %s Vendor: %s part no: %s",
5149                         adapter->port_name,
5150                         be_misconfig_evt_port_state[adapter->phy_state],
5151                         adapter->phy.vendor_name,
5152                         adapter->phy.vendor_pn);
5153         }
5154         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5155 }
5156
5157 static void be_worker(struct work_struct *work)
5158 {
5159         struct be_adapter *adapter =
5160                 container_of(work, struct be_adapter, work.work);
5161         struct be_rx_obj *rxo;
5162         int i;
5163
5164         if (be_physfn(adapter) &&
5165             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5166                 be_cmd_get_die_temperature(adapter);
5167
5168         /* when interrupts are not yet enabled, just reap any pending
5169          * mcc completions
5170          */
5171         if (!netif_running(adapter->netdev)) {
5172                 local_bh_disable();
5173                 be_process_mcc(adapter);
5174                 local_bh_enable();
5175                 goto reschedule;
5176         }
5177
5178         if (!adapter->stats_cmd_sent) {
5179                 if (lancer_chip(adapter))
5180                         lancer_cmd_get_pport_stats(adapter,
5181                                                    &adapter->stats_cmd);
5182                 else
5183                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5184         }
5185
5186         for_all_rx_queues(adapter, rxo, i) {
5187                 /* Replenish RX-queues starved due to memory
5188                  * allocation failures.
5189                  */
5190                 if (rxo->rx_post_starved)
5191                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5192         }
5193
5194         /* EQ-delay update for Skyhawk is done while notifying EQ */
5195         if (!skyhawk_chip(adapter))
5196                 be_eqd_update(adapter, false);
5197
5198         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5199                 be_log_sfp_info(adapter);
5200
5201 reschedule:
5202         adapter->work_counter++;
5203         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
5204 }
5205
5206 static void be_unmap_pci_bars(struct be_adapter *adapter)
5207 {
5208         if (adapter->csr)
5209                 pci_iounmap(adapter->pdev, adapter->csr);
5210         if (adapter->db)
5211                 pci_iounmap(adapter->pdev, adapter->db);
5212         if (adapter->pcicfg && adapter->pcicfg_mapped)
5213                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5214 }
5215
5216 static int db_bar(struct be_adapter *adapter)
5217 {
5218         if (lancer_chip(adapter) || be_virtfn(adapter))
5219                 return 0;
5220         else
5221                 return 4;
5222 }
5223
5224 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5225 {
5226         if (skyhawk_chip(adapter)) {
5227                 adapter->roce_db.size = 4096;
5228                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5229                                                               db_bar(adapter));
5230                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5231                                                                db_bar(adapter));
5232         }
5233         return 0;
5234 }
5235
5236 static int be_map_pci_bars(struct be_adapter *adapter)
5237 {
5238         struct pci_dev *pdev = adapter->pdev;
5239         u8 __iomem *addr;
5240         u32 sli_intf;
5241
5242         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5243         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5244                                 SLI_INTF_FAMILY_SHIFT;
5245         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5246
5247         if (BEx_chip(adapter) && be_physfn(adapter)) {
5248                 adapter->csr = pci_iomap(pdev, 2, 0);
5249                 if (!adapter->csr)
5250                         return -ENOMEM;
5251         }
5252
5253         addr = pci_iomap(pdev, db_bar(adapter), 0);
5254         if (!addr)
5255                 goto pci_map_err;
5256         adapter->db = addr;
5257
5258         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5259                 if (be_physfn(adapter)) {
5260                         /* PCICFG is the 2nd BAR in BE2 */
5261                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5262                         if (!addr)
5263                                 goto pci_map_err;
5264                         adapter->pcicfg = addr;
5265                         adapter->pcicfg_mapped = true;
5266                 } else {
5267                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5268                         adapter->pcicfg_mapped = false;
5269                 }
5270         }
5271
5272         be_roce_map_pci_bars(adapter);
5273         return 0;
5274
5275 pci_map_err:
5276         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5277         be_unmap_pci_bars(adapter);
5278         return -ENOMEM;
5279 }
5280
5281 static void be_drv_cleanup(struct be_adapter *adapter)
5282 {
5283         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5284         struct device *dev = &adapter->pdev->dev;
5285
5286         if (mem->va)
5287                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5288
5289         mem = &adapter->rx_filter;
5290         if (mem->va)
5291                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5292
5293         mem = &adapter->stats_cmd;
5294         if (mem->va)
5295                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5296 }
5297
5298 /* Allocate and initialize various fields in be_adapter struct */
5299 static int be_drv_init(struct be_adapter *adapter)
5300 {
5301         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5302         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5303         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5304         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5305         struct device *dev = &adapter->pdev->dev;
5306         int status = 0;
5307
5308         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5309         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5310                                                  &mbox_mem_alloc->dma,
5311                                                  GFP_KERNEL);
5312         if (!mbox_mem_alloc->va)
5313                 return -ENOMEM;
5314
5315         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5316         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5317         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5318
5319         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5320         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5321                                             &rx_filter->dma, GFP_KERNEL);
5322         if (!rx_filter->va) {
5323                 status = -ENOMEM;
5324                 goto free_mbox;
5325         }
5326
5327         if (lancer_chip(adapter))
5328                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5329         else if (BE2_chip(adapter))
5330                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5331         else if (BE3_chip(adapter))
5332                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5333         else
5334                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5335         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5336                                             &stats_cmd->dma, GFP_KERNEL);
5337         if (!stats_cmd->va) {
5338                 status = -ENOMEM;
5339                 goto free_rx_filter;
5340         }
5341
5342         mutex_init(&adapter->mbox_lock);
5343         spin_lock_init(&adapter->mcc_lock);
5344         spin_lock_init(&adapter->mcc_cq_lock);
5345         init_completion(&adapter->et_cmd_compl);
5346
5347         pci_save_state(adapter->pdev);
5348
5349         INIT_DELAYED_WORK(&adapter->work, be_worker);
5350         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5351                           be_err_detection_task);
5352
5353         adapter->rx_fc = true;
5354         adapter->tx_fc = true;
5355
5356         /* Must be a power of 2 or else MODULO will BUG_ON */
5357         adapter->be_get_temp_freq = 64;
5358
5359         return 0;
5360
5361 free_rx_filter:
5362         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5363 free_mbox:
5364         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5365                           mbox_mem_alloc->dma);
5366         return status;
5367 }
5368
5369 static void be_remove(struct pci_dev *pdev)
5370 {
5371         struct be_adapter *adapter = pci_get_drvdata(pdev);
5372
5373         if (!adapter)
5374                 return;
5375
5376         be_roce_dev_remove(adapter);
5377         be_intr_set(adapter, false);
5378
5379         be_cancel_err_detection(adapter);
5380
5381         unregister_netdev(adapter->netdev);
5382
5383         be_clear(adapter);
5384
5385         /* tell fw we're done with firing cmds */
5386         be_cmd_fw_clean(adapter);
5387
5388         be_unmap_pci_bars(adapter);
5389         be_drv_cleanup(adapter);
5390
5391         pci_disable_pcie_error_reporting(pdev);
5392
5393         pci_release_regions(pdev);
5394         pci_disable_device(pdev);
5395
5396         free_netdev(adapter->netdev);
5397 }
5398
5399 static ssize_t be_hwmon_show_temp(struct device *dev,
5400                                   struct device_attribute *dev_attr,
5401                                   char *buf)
5402 {
5403         struct be_adapter *adapter = dev_get_drvdata(dev);
5404
5405         /* Unit: millidegree Celsius */
5406         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5407                 return -EIO;
5408         else
5409                 return sprintf(buf, "%u\n",
5410                                adapter->hwmon_info.be_on_die_temp * 1000);
5411 }
5412
5413 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5414                           be_hwmon_show_temp, NULL, 1);
5415
5416 static struct attribute *be_hwmon_attrs[] = {
5417         &sensor_dev_attr_temp1_input.dev_attr.attr,
5418         NULL
5419 };
5420
5421 ATTRIBUTE_GROUPS(be_hwmon);
5422
5423 static char *mc_name(struct be_adapter *adapter)
5424 {
5425         char *str = ""; /* default */
5426
5427         switch (adapter->mc_type) {
5428         case UMC:
5429                 str = "UMC";
5430                 break;
5431         case FLEX10:
5432                 str = "FLEX10";
5433                 break;
5434         case vNIC1:
5435                 str = "vNIC-1";
5436                 break;
5437         case nPAR:
5438                 str = "nPAR";
5439                 break;
5440         case UFP:
5441                 str = "UFP";
5442                 break;
5443         case vNIC2:
5444                 str = "vNIC-2";
5445                 break;
5446         default:
5447                 str = "";
5448         }
5449
5450         return str;
5451 }
5452
5453 static inline char *func_name(struct be_adapter *adapter)
5454 {
5455         return be_physfn(adapter) ? "PF" : "VF";
5456 }
5457
5458 static inline char *nic_name(struct pci_dev *pdev)
5459 {
5460         switch (pdev->device) {
5461         case OC_DEVICE_ID1:
5462                 return OC_NAME;
5463         case OC_DEVICE_ID2:
5464                 return OC_NAME_BE;
5465         case OC_DEVICE_ID3:
5466         case OC_DEVICE_ID4:
5467                 return OC_NAME_LANCER;
5468         case BE_DEVICE_ID2:
5469                 return BE3_NAME;
5470         case OC_DEVICE_ID5:
5471         case OC_DEVICE_ID6:
5472                 return OC_NAME_SH;
5473         default:
5474                 return BE_NAME;
5475         }
5476 }
5477
5478 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5479 {
5480         struct be_adapter *adapter;
5481         struct net_device *netdev;
5482         int status = 0;
5483
5484         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5485
5486         status = pci_enable_device(pdev);
5487         if (status)
5488                 goto do_none;
5489
5490         status = pci_request_regions(pdev, DRV_NAME);
5491         if (status)
5492                 goto disable_dev;
5493         pci_set_master(pdev);
5494
5495         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5496         if (!netdev) {
5497                 status = -ENOMEM;
5498                 goto rel_reg;
5499         }
5500         adapter = netdev_priv(netdev);
5501         adapter->pdev = pdev;
5502         pci_set_drvdata(pdev, adapter);
5503         adapter->netdev = netdev;
5504         SET_NETDEV_DEV(netdev, &pdev->dev);
5505
5506         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5507         if (!status) {
5508                 netdev->features |= NETIF_F_HIGHDMA;
5509         } else {
5510                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5511                 if (status) {
5512                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5513                         goto free_netdev;
5514                 }
5515         }
5516
5517         status = pci_enable_pcie_error_reporting(pdev);
5518         if (!status)
5519                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5520
5521         status = be_map_pci_bars(adapter);
5522         if (status)
5523                 goto free_netdev;
5524
5525         status = be_drv_init(adapter);
5526         if (status)
5527                 goto unmap_bars;
5528
5529         status = be_setup(adapter);
5530         if (status)
5531                 goto drv_cleanup;
5532
5533         be_netdev_init(netdev);
5534         status = register_netdev(netdev);
5535         if (status != 0)
5536                 goto unsetup;
5537
5538         be_roce_dev_add(adapter);
5539
5540         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5541
5542         /* On Die temperature not supported for VF. */
5543         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5544                 adapter->hwmon_info.hwmon_dev =
5545                         devm_hwmon_device_register_with_groups(&pdev->dev,
5546                                                                DRV_NAME,
5547                                                                adapter,
5548                                                                be_hwmon_groups);
5549                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5550         }
5551
5552         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5553                  func_name(adapter), mc_name(adapter), adapter->port_name);
5554
5555         return 0;
5556
5557 unsetup:
5558         be_clear(adapter);
5559 drv_cleanup:
5560         be_drv_cleanup(adapter);
5561 unmap_bars:
5562         be_unmap_pci_bars(adapter);
5563 free_netdev:
5564         free_netdev(netdev);
5565 rel_reg:
5566         pci_release_regions(pdev);
5567 disable_dev:
5568         pci_disable_device(pdev);
5569 do_none:
5570         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5571         return status;
5572 }
5573
5574 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5575 {
5576         struct be_adapter *adapter = pci_get_drvdata(pdev);
5577
5578         be_intr_set(adapter, false);
5579         be_cancel_err_detection(adapter);
5580
5581         be_cleanup(adapter);
5582
5583         pci_save_state(pdev);
5584         pci_disable_device(pdev);
5585         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5586         return 0;
5587 }
5588
5589 static int be_pci_resume(struct pci_dev *pdev)
5590 {
5591         struct be_adapter *adapter = pci_get_drvdata(pdev);
5592         int status = 0;
5593
5594         status = pci_enable_device(pdev);
5595         if (status)
5596                 return status;
5597
5598         pci_restore_state(pdev);
5599
5600         status = be_resume(adapter);
5601         if (status)
5602                 return status;
5603
5604         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5605
5606         return 0;
5607 }
5608
5609 /*
5610  * An FLR will stop BE from DMAing any data.
5611  */
5612 static void be_shutdown(struct pci_dev *pdev)
5613 {
5614         struct be_adapter *adapter = pci_get_drvdata(pdev);
5615
5616         if (!adapter)
5617                 return;
5618
5619         be_roce_dev_shutdown(adapter);
5620         cancel_delayed_work_sync(&adapter->work);
5621         be_cancel_err_detection(adapter);
5622
5623         netif_device_detach(adapter->netdev);
5624
5625         be_cmd_reset_function(adapter);
5626
5627         pci_disable_device(pdev);
5628 }
5629
5630 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5631                                             pci_channel_state_t state)
5632 {
5633         struct be_adapter *adapter = pci_get_drvdata(pdev);
5634
5635         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5636
5637         be_roce_dev_remove(adapter);
5638
5639         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5640                 be_set_error(adapter, BE_ERROR_EEH);
5641
5642                 be_cancel_err_detection(adapter);
5643
5644                 be_cleanup(adapter);
5645         }
5646
5647         if (state == pci_channel_io_perm_failure)
5648                 return PCI_ERS_RESULT_DISCONNECT;
5649
5650         pci_disable_device(pdev);
5651
5652         /* The error could cause the FW to trigger a flash debug dump.
5653          * Resetting the card while flash dump is in progress
5654          * can cause it not to recover; wait for it to finish.
5655          * Wait only for first function as it is needed only once per
5656          * adapter.
5657          */
5658         if (pdev->devfn == 0)
5659                 ssleep(30);
5660
5661         return PCI_ERS_RESULT_NEED_RESET;
5662 }
5663
5664 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5665 {
5666         struct be_adapter *adapter = pci_get_drvdata(pdev);
5667         int status;
5668
5669         dev_info(&adapter->pdev->dev, "EEH reset\n");
5670
5671         status = pci_enable_device(pdev);
5672         if (status)
5673                 return PCI_ERS_RESULT_DISCONNECT;
5674
5675         pci_set_master(pdev);
5676         pci_restore_state(pdev);
5677
5678         /* Check if card is ok and fw is ready */
5679         dev_info(&adapter->pdev->dev,
5680                  "Waiting for FW to be ready after EEH reset\n");
5681         status = be_fw_wait_ready(adapter);
5682         if (status)
5683                 return PCI_ERS_RESULT_DISCONNECT;
5684
5685         pci_cleanup_aer_uncorrect_error_status(pdev);
5686         be_clear_error(adapter, BE_CLEAR_ALL);
5687         return PCI_ERS_RESULT_RECOVERED;
5688 }
5689
5690 static void be_eeh_resume(struct pci_dev *pdev)
5691 {
5692         int status = 0;
5693         struct be_adapter *adapter = pci_get_drvdata(pdev);
5694
5695         dev_info(&adapter->pdev->dev, "EEH resume\n");
5696
5697         pci_save_state(pdev);
5698
5699         status = be_resume(adapter);
5700         if (status)
5701                 goto err;
5702
5703         be_roce_dev_add(adapter);
5704
5705         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5706         return;
5707 err:
5708         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5709 }
5710
5711 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5712 {
5713         struct be_adapter *adapter = pci_get_drvdata(pdev);
5714         struct be_resources vft_res = {0};
5715         int status;
5716
5717         if (!num_vfs)
5718                 be_vf_clear(adapter);
5719
5720         adapter->num_vfs = num_vfs;
5721
5722         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5723                 dev_warn(&pdev->dev,
5724                          "Cannot disable VFs while they are assigned\n");
5725                 return -EBUSY;
5726         }
5727
5728         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5729          * are equally distributed across the max-number of VFs. The user may
5730          * request only a subset of the max-vfs to be enabled.
5731          * Based on num_vfs, redistribute the resources across num_vfs so that
5732          * each VF will have access to more number of resources.
5733          * This facility is not available in BE3 FW.
5734          * Also, this is done by FW in Lancer chip.
5735          */
5736         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5737                 be_calculate_vf_res(adapter, adapter->num_vfs,
5738                                     &vft_res);
5739                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5740                                                  adapter->num_vfs, &vft_res);
5741                 if (status)
5742                         dev_err(&pdev->dev,
5743                                 "Failed to optimize SR-IOV resources\n");
5744         }
5745
5746         status = be_get_resources(adapter);
5747         if (status)
5748                 return be_cmd_status(status);
5749
5750         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5751         rtnl_lock();
5752         status = be_update_queues(adapter);
5753         rtnl_unlock();
5754         if (status)
5755                 return be_cmd_status(status);
5756
5757         if (adapter->num_vfs)
5758                 status = be_vf_setup(adapter);
5759
5760         if (!status)
5761                 return adapter->num_vfs;
5762
5763         return 0;
5764 }
5765
5766 static const struct pci_error_handlers be_eeh_handlers = {
5767         .error_detected = be_eeh_err_detected,
5768         .slot_reset = be_eeh_reset,
5769         .resume = be_eeh_resume,
5770 };
5771
5772 static struct pci_driver be_driver = {
5773         .name = DRV_NAME,
5774         .id_table = be_dev_ids,
5775         .probe = be_probe,
5776         .remove = be_remove,
5777         .suspend = be_suspend,
5778         .resume = be_pci_resume,
5779         .shutdown = be_shutdown,
5780         .sriov_configure = be_pci_sriov_configure,
5781         .err_handler = &be_eeh_handlers
5782 };
5783
5784 static int __init be_init_module(void)
5785 {
5786         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5787             rx_frag_size != 2048) {
5788                 printk(KERN_WARNING DRV_NAME
5789                         " : Module param rx_frag_size must be 2048/4096/8192."
5790                         " Using 2048\n");
5791                 rx_frag_size = 2048;
5792         }
5793
5794         if (num_vfs > 0) {
5795                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5796                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5797         }
5798
5799         return pci_register_driver(&be_driver);
5800 }
5801 module_init(be_init_module);
5802
5803 static void __exit be_exit_module(void)
5804 {
5805         pci_unregister_driver(&be_driver);
5806 }
5807 module_exit(be_exit_module);