Merge branches 'pm-cpuidle', 'pm-opp' and 'pm-avs'
[cascardo/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 static const struct pci_device_id be_dev_ids[] = {
45         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
46         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
47         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
49         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
50         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
51         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
52         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
53         { 0 }
54 };
55 MODULE_DEVICE_TABLE(pci, be_dev_ids);
56 /* UE Status Low CSR */
57 static const char * const ue_status_low_desc[] = {
58         "CEV",
59         "CTX",
60         "DBUF",
61         "ERX",
62         "Host",
63         "MPU",
64         "NDMA",
65         "PTC ",
66         "RDMA ",
67         "RXF ",
68         "RXIPS ",
69         "RXULP0 ",
70         "RXULP1 ",
71         "RXULP2 ",
72         "TIM ",
73         "TPOST ",
74         "TPRE ",
75         "TXIPS ",
76         "TXULP0 ",
77         "TXULP1 ",
78         "UC ",
79         "WDMA ",
80         "TXULP2 ",
81         "HOST1 ",
82         "P0_OB_LINK ",
83         "P1_OB_LINK ",
84         "HOST_GPIO ",
85         "MBOX ",
86         "ERX2 ",
87         "SPARE ",
88         "JTAG ",
89         "MPU_INTPEND "
90 };
91
92 /* UE Status High CSR */
93 static const char * const ue_status_hi_desc[] = {
94         "LPCMEMHOST",
95         "MGMT_MAC",
96         "PCS0ONLINE",
97         "MPU_IRAM",
98         "PCS1ONLINE",
99         "PCTL0",
100         "PCTL1",
101         "PMEM",
102         "RR",
103         "TXPB",
104         "RXPP",
105         "XAUI",
106         "TXP",
107         "ARM",
108         "IPC",
109         "HOST2",
110         "HOST3",
111         "HOST4",
112         "HOST5",
113         "HOST6",
114         "HOST7",
115         "ECRC",
116         "Poison TLP",
117         "NETC",
118         "PERIPH",
119         "LLTXULP",
120         "D2P",
121         "RCON",
122         "LDMA",
123         "LLTXP",
124         "LLTXPB",
125         "Unknown"
126 };
127
128 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
129                                  BE_IF_FLAGS_BROADCAST | \
130                                  BE_IF_FLAGS_MULTICAST | \
131                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
132
133 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
134 {
135         struct be_dma_mem *mem = &q->dma_mem;
136
137         if (mem->va) {
138                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
139                                   mem->dma);
140                 mem->va = NULL;
141         }
142 }
143
144 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
145                           u16 len, u16 entry_size)
146 {
147         struct be_dma_mem *mem = &q->dma_mem;
148
149         memset(q, 0, sizeof(*q));
150         q->len = len;
151         q->entry_size = entry_size;
152         mem->size = len * entry_size;
153         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
154                                       GFP_KERNEL);
155         if (!mem->va)
156                 return -ENOMEM;
157         return 0;
158 }
159
160 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
161 {
162         u32 reg, enabled;
163
164         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
165                               &reg);
166         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
167
168         if (!enabled && enable)
169                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
170         else if (enabled && !enable)
171                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
172         else
173                 return;
174
175         pci_write_config_dword(adapter->pdev,
176                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
177 }
178
179 static void be_intr_set(struct be_adapter *adapter, bool enable)
180 {
181         int status = 0;
182
183         /* On lancer interrupts can't be controlled via this register */
184         if (lancer_chip(adapter))
185                 return;
186
187         if (be_check_error(adapter, BE_ERROR_EEH))
188                 return;
189
190         status = be_cmd_intr_set(adapter, enable);
191         if (status)
192                 be_reg_intr_set(adapter, enable);
193 }
194
195 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
196 {
197         u32 val = 0;
198
199         if (be_check_error(adapter, BE_ERROR_HW))
200                 return;
201
202         val |= qid & DB_RQ_RING_ID_MASK;
203         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
204
205         wmb();
206         iowrite32(val, adapter->db + DB_RQ_OFFSET);
207 }
208
209 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
210                           u16 posted)
211 {
212         u32 val = 0;
213
214         if (be_check_error(adapter, BE_ERROR_HW))
215                 return;
216
217         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
218         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
219
220         wmb();
221         iowrite32(val, adapter->db + txo->db_offset);
222 }
223
224 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
225                          bool arm, bool clear_int, u16 num_popped,
226                          u32 eq_delay_mult_enc)
227 {
228         u32 val = 0;
229
230         val |= qid & DB_EQ_RING_ID_MASK;
231         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
232
233         if (be_check_error(adapter, BE_ERROR_HW))
234                 return;
235
236         if (arm)
237                 val |= 1 << DB_EQ_REARM_SHIFT;
238         if (clear_int)
239                 val |= 1 << DB_EQ_CLR_SHIFT;
240         val |= 1 << DB_EQ_EVNT_SHIFT;
241         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
242         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
243         iowrite32(val, adapter->db + DB_EQ_OFFSET);
244 }
245
246 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
247 {
248         u32 val = 0;
249
250         val |= qid & DB_CQ_RING_ID_MASK;
251         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
252                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
253
254         if (be_check_error(adapter, BE_ERROR_HW))
255                 return;
256
257         if (arm)
258                 val |= 1 << DB_CQ_REARM_SHIFT;
259         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
260         iowrite32(val, adapter->db + DB_CQ_OFFSET);
261 }
262
263 static int be_mac_addr_set(struct net_device *netdev, void *p)
264 {
265         struct be_adapter *adapter = netdev_priv(netdev);
266         struct device *dev = &adapter->pdev->dev;
267         struct sockaddr *addr = p;
268         int status;
269         u8 mac[ETH_ALEN];
270         u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
271
272         if (!is_valid_ether_addr(addr->sa_data))
273                 return -EADDRNOTAVAIL;
274
275         /* Proceed further only if, User provided MAC is different
276          * from active MAC
277          */
278         if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
279                 return 0;
280
281         /* if device is not running, copy MAC to netdev->dev_addr */
282         if (!netif_running(netdev))
283                 goto done;
284
285         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
286          * privilege or if PF did not provision the new MAC address.
287          * On BE3, this cmd will always fail if the VF doesn't have the
288          * FILTMGMT privilege. This failure is OK, only if the PF programmed
289          * the MAC for the VF.
290          */
291         status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
292                                  adapter->if_handle, &adapter->pmac_id[0], 0);
293         if (!status) {
294                 curr_pmac_id = adapter->pmac_id[0];
295
296                 /* Delete the old programmed MAC. This call may fail if the
297                  * old MAC was already deleted by the PF driver.
298                  */
299                 if (adapter->pmac_id[0] != old_pmac_id)
300                         be_cmd_pmac_del(adapter, adapter->if_handle,
301                                         old_pmac_id, 0);
302         }
303
304         /* Decide if the new MAC is successfully activated only after
305          * querying the FW
306          */
307         status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
308                                        adapter->if_handle, true, 0);
309         if (status)
310                 goto err;
311
312         /* The MAC change did not happen, either due to lack of privilege
313          * or PF didn't pre-provision.
314          */
315         if (!ether_addr_equal(addr->sa_data, mac)) {
316                 status = -EPERM;
317                 goto err;
318         }
319 done:
320         ether_addr_copy(netdev->dev_addr, addr->sa_data);
321         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
322         return 0;
323 err:
324         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
325         return status;
326 }
327
328 /* BE2 supports only v0 cmd */
329 static void *hw_stats_from_cmd(struct be_adapter *adapter)
330 {
331         if (BE2_chip(adapter)) {
332                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
333
334                 return &cmd->hw_stats;
335         } else if (BE3_chip(adapter)) {
336                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
337
338                 return &cmd->hw_stats;
339         } else {
340                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
341
342                 return &cmd->hw_stats;
343         }
344 }
345
346 /* BE2 supports only v0 cmd */
347 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
348 {
349         if (BE2_chip(adapter)) {
350                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
351
352                 return &hw_stats->erx;
353         } else if (BE3_chip(adapter)) {
354                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
355
356                 return &hw_stats->erx;
357         } else {
358                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
359
360                 return &hw_stats->erx;
361         }
362 }
363
364 static void populate_be_v0_stats(struct be_adapter *adapter)
365 {
366         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
367         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
368         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
369         struct be_port_rxf_stats_v0 *port_stats =
370                                         &rxf_stats->port[adapter->port_num];
371         struct be_drv_stats *drvs = &adapter->drv_stats;
372
373         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
374         drvs->rx_pause_frames = port_stats->rx_pause_frames;
375         drvs->rx_crc_errors = port_stats->rx_crc_errors;
376         drvs->rx_control_frames = port_stats->rx_control_frames;
377         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
378         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
379         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
380         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
381         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
382         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
383         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
384         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
385         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
386         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
387         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
388         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
389         drvs->rx_dropped_header_too_small =
390                 port_stats->rx_dropped_header_too_small;
391         drvs->rx_address_filtered =
392                                         port_stats->rx_address_filtered +
393                                         port_stats->rx_vlan_filtered;
394         drvs->rx_alignment_symbol_errors =
395                 port_stats->rx_alignment_symbol_errors;
396
397         drvs->tx_pauseframes = port_stats->tx_pauseframes;
398         drvs->tx_controlframes = port_stats->tx_controlframes;
399
400         if (adapter->port_num)
401                 drvs->jabber_events = rxf_stats->port1_jabber_events;
402         else
403                 drvs->jabber_events = rxf_stats->port0_jabber_events;
404         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
405         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
406         drvs->forwarded_packets = rxf_stats->forwarded_packets;
407         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
408         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
409         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
410         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
411 }
412
413 static void populate_be_v1_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v1 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
424         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
425         drvs->rx_pause_frames = port_stats->rx_pause_frames;
426         drvs->rx_crc_errors = port_stats->rx_crc_errors;
427         drvs->rx_control_frames = port_stats->rx_control_frames;
428         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
429         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
430         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
431         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
432         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
433         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
434         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
435         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
436         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
437         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_input_fifo_overflow_drop =
441                 port_stats->rx_input_fifo_overflow_drop;
442         drvs->rx_address_filtered = port_stats->rx_address_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
449         drvs->jabber_events = port_stats->jabber_events;
450         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
451         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
452         drvs->forwarded_packets = rxf_stats->forwarded_packets;
453         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
454         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
455         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
456         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
457 }
458
459 static void populate_be_v2_stats(struct be_adapter *adapter)
460 {
461         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
462         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
463         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
464         struct be_port_rxf_stats_v2 *port_stats =
465                                         &rxf_stats->port[adapter->port_num];
466         struct be_drv_stats *drvs = &adapter->drv_stats;
467
468         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
469         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
470         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
471         drvs->rx_pause_frames = port_stats->rx_pause_frames;
472         drvs->rx_crc_errors = port_stats->rx_crc_errors;
473         drvs->rx_control_frames = port_stats->rx_control_frames;
474         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
475         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
476         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
477         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
478         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
479         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
480         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
481         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
482         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
483         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
484         drvs->rx_dropped_header_too_small =
485                 port_stats->rx_dropped_header_too_small;
486         drvs->rx_input_fifo_overflow_drop =
487                 port_stats->rx_input_fifo_overflow_drop;
488         drvs->rx_address_filtered = port_stats->rx_address_filtered;
489         drvs->rx_alignment_symbol_errors =
490                 port_stats->rx_alignment_symbol_errors;
491         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
492         drvs->tx_pauseframes = port_stats->tx_pauseframes;
493         drvs->tx_controlframes = port_stats->tx_controlframes;
494         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
495         drvs->jabber_events = port_stats->jabber_events;
496         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
497         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
498         drvs->forwarded_packets = rxf_stats->forwarded_packets;
499         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
500         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
501         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
502         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
503         if (be_roce_supported(adapter)) {
504                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
505                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
506                 drvs->rx_roce_frames = port_stats->roce_frames_received;
507                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
508                 drvs->roce_drops_payload_len =
509                         port_stats->roce_drops_payload_len;
510         }
511 }
512
513 static void populate_lancer_stats(struct be_adapter *adapter)
514 {
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
517
518         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
519         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
520         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
521         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
522         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
523         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
524         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
525         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
526         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
527         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
528         drvs->rx_dropped_tcp_length =
529                                 pport_stats->rx_dropped_invalid_tcp_length;
530         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
533         drvs->rx_dropped_header_too_small =
534                                 pport_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
536         drvs->rx_address_filtered =
537                                         pport_stats->rx_address_filtered +
538                                         pport_stats->rx_vlan_filtered;
539         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
540         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
541         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
542         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
543         drvs->jabber_events = pport_stats->rx_jabbers;
544         drvs->forwarded_packets = pport_stats->num_forwards_lo;
545         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
546         drvs->rx_drops_too_many_frags =
547                                 pport_stats->rx_drops_too_many_frags_lo;
548 }
549
550 static void accumulate_16bit_val(u32 *acc, u16 val)
551 {
552 #define lo(x)                   (x & 0xFFFF)
553 #define hi(x)                   (x & 0xFFFF0000)
554         bool wrapped = val < lo(*acc);
555         u32 newacc = hi(*acc) + val;
556
557         if (wrapped)
558                 newacc += 65536;
559         ACCESS_ONCE(*acc) = newacc;
560 }
561
562 static void populate_erx_stats(struct be_adapter *adapter,
563                                struct be_rx_obj *rxo, u32 erx_stat)
564 {
565         if (!BEx_chip(adapter))
566                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
567         else
568                 /* below erx HW counter can actually wrap around after
569                  * 65535. Driver accumulates a 32-bit value
570                  */
571                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
572                                      (u16)erx_stat);
573 }
574
575 void be_parse_stats(struct be_adapter *adapter)
576 {
577         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
578         struct be_rx_obj *rxo;
579         int i;
580         u32 erx_stat;
581
582         if (lancer_chip(adapter)) {
583                 populate_lancer_stats(adapter);
584         } else {
585                 if (BE2_chip(adapter))
586                         populate_be_v0_stats(adapter);
587                 else if (BE3_chip(adapter))
588                         /* for BE3 */
589                         populate_be_v1_stats(adapter);
590                 else
591                         populate_be_v2_stats(adapter);
592
593                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
594                 for_all_rx_queues(adapter, rxo, i) {
595                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
596                         populate_erx_stats(adapter, rxo, erx_stat);
597                 }
598         }
599 }
600
601 static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev,
602                                                 struct rtnl_link_stats64 *stats)
603 {
604         struct be_adapter *adapter = netdev_priv(netdev);
605         struct be_drv_stats *drvs = &adapter->drv_stats;
606         struct be_rx_obj *rxo;
607         struct be_tx_obj *txo;
608         u64 pkts, bytes;
609         unsigned int start;
610         int i;
611
612         for_all_rx_queues(adapter, rxo, i) {
613                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
614
615                 do {
616                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
617                         pkts = rx_stats(rxo)->rx_pkts;
618                         bytes = rx_stats(rxo)->rx_bytes;
619                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
620                 stats->rx_packets += pkts;
621                 stats->rx_bytes += bytes;
622                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
623                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
624                                         rx_stats(rxo)->rx_drops_no_frags;
625         }
626
627         for_all_tx_queues(adapter, txo, i) {
628                 const struct be_tx_stats *tx_stats = tx_stats(txo);
629
630                 do {
631                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
632                         pkts = tx_stats(txo)->tx_pkts;
633                         bytes = tx_stats(txo)->tx_bytes;
634                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
635                 stats->tx_packets += pkts;
636                 stats->tx_bytes += bytes;
637         }
638
639         /* bad pkts received */
640         stats->rx_errors = drvs->rx_crc_errors +
641                 drvs->rx_alignment_symbol_errors +
642                 drvs->rx_in_range_errors +
643                 drvs->rx_out_range_errors +
644                 drvs->rx_frame_too_long +
645                 drvs->rx_dropped_too_small +
646                 drvs->rx_dropped_too_short +
647                 drvs->rx_dropped_header_too_small +
648                 drvs->rx_dropped_tcp_length +
649                 drvs->rx_dropped_runt;
650
651         /* detailed rx errors */
652         stats->rx_length_errors = drvs->rx_in_range_errors +
653                 drvs->rx_out_range_errors +
654                 drvs->rx_frame_too_long;
655
656         stats->rx_crc_errors = drvs->rx_crc_errors;
657
658         /* frame alignment errors */
659         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
660
661         /* receiver fifo overrun */
662         /* drops_no_pbuf is no per i/f, it's per BE card */
663         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
664                                 drvs->rx_input_fifo_overflow_drop +
665                                 drvs->rx_drops_no_pbuf;
666         return stats;
667 }
668
669 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
670 {
671         struct net_device *netdev = adapter->netdev;
672
673         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
674                 netif_carrier_off(netdev);
675                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
676         }
677
678         if (link_status)
679                 netif_carrier_on(netdev);
680         else
681                 netif_carrier_off(netdev);
682
683         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
684 }
685
686 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
687 {
688         struct be_tx_stats *stats = tx_stats(txo);
689         u64 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
690
691         u64_stats_update_begin(&stats->sync);
692         stats->tx_reqs++;
693         stats->tx_bytes += skb->len;
694         stats->tx_pkts += tx_pkts;
695         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
696                 stats->tx_vxlan_offload_pkts += tx_pkts;
697         u64_stats_update_end(&stats->sync);
698 }
699
700 /* Returns number of WRBs needed for the skb */
701 static u32 skb_wrb_cnt(struct sk_buff *skb)
702 {
703         /* +1 for the header wrb */
704         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
705 }
706
707 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
708 {
709         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
710         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
711         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
712         wrb->rsvd0 = 0;
713 }
714
715 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
716  * to avoid the swap and shift/mask operations in wrb_fill().
717  */
718 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
719 {
720         wrb->frag_pa_hi = 0;
721         wrb->frag_pa_lo = 0;
722         wrb->frag_len = 0;
723         wrb->rsvd0 = 0;
724 }
725
726 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
727                                      struct sk_buff *skb)
728 {
729         u8 vlan_prio;
730         u16 vlan_tag;
731
732         vlan_tag = skb_vlan_tag_get(skb);
733         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
734         /* If vlan priority provided by OS is NOT in available bmap */
735         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
736                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
737                                 adapter->recommended_prio_bits;
738
739         return vlan_tag;
740 }
741
742 /* Used only for IP tunnel packets */
743 static u16 skb_inner_ip_proto(struct sk_buff *skb)
744 {
745         return (inner_ip_hdr(skb)->version == 4) ?
746                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
747 }
748
749 static u16 skb_ip_proto(struct sk_buff *skb)
750 {
751         return (ip_hdr(skb)->version == 4) ?
752                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
753 }
754
755 static inline bool be_is_txq_full(struct be_tx_obj *txo)
756 {
757         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
758 }
759
760 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
761 {
762         return atomic_read(&txo->q.used) < txo->q.len / 2;
763 }
764
765 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
766 {
767         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
768 }
769
770 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
771                                        struct sk_buff *skb,
772                                        struct be_wrb_params *wrb_params)
773 {
774         u16 proto;
775
776         if (skb_is_gso(skb)) {
777                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
778                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
779                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
780                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
781         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
782                 if (skb->encapsulation) {
783                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
784                         proto = skb_inner_ip_proto(skb);
785                 } else {
786                         proto = skb_ip_proto(skb);
787                 }
788                 if (proto == IPPROTO_TCP)
789                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
790                 else if (proto == IPPROTO_UDP)
791                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
792         }
793
794         if (skb_vlan_tag_present(skb)) {
795                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
796                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
797         }
798
799         BE_WRB_F_SET(wrb_params->features, CRC, 1);
800 }
801
802 static void wrb_fill_hdr(struct be_adapter *adapter,
803                          struct be_eth_hdr_wrb *hdr,
804                          struct be_wrb_params *wrb_params,
805                          struct sk_buff *skb)
806 {
807         memset(hdr, 0, sizeof(*hdr));
808
809         SET_TX_WRB_HDR_BITS(crc, hdr,
810                             BE_WRB_F_GET(wrb_params->features, CRC));
811         SET_TX_WRB_HDR_BITS(ipcs, hdr,
812                             BE_WRB_F_GET(wrb_params->features, IPCS));
813         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
814                             BE_WRB_F_GET(wrb_params->features, TCPCS));
815         SET_TX_WRB_HDR_BITS(udpcs, hdr,
816                             BE_WRB_F_GET(wrb_params->features, UDPCS));
817
818         SET_TX_WRB_HDR_BITS(lso, hdr,
819                             BE_WRB_F_GET(wrb_params->features, LSO));
820         SET_TX_WRB_HDR_BITS(lso6, hdr,
821                             BE_WRB_F_GET(wrb_params->features, LSO6));
822         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
823
824         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
825          * hack is not needed, the evt bit is set while ringing DB.
826          */
827         SET_TX_WRB_HDR_BITS(event, hdr,
828                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
829         SET_TX_WRB_HDR_BITS(vlan, hdr,
830                             BE_WRB_F_GET(wrb_params->features, VLAN));
831         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
832
833         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
834         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
835         SET_TX_WRB_HDR_BITS(mgmt, hdr,
836                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
837 }
838
839 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
840                           bool unmap_single)
841 {
842         dma_addr_t dma;
843         u32 frag_len = le32_to_cpu(wrb->frag_len);
844
845
846         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
847                 (u64)le32_to_cpu(wrb->frag_pa_lo);
848         if (frag_len) {
849                 if (unmap_single)
850                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
851                 else
852                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
853         }
854 }
855
856 /* Grab a WRB header for xmit */
857 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
858 {
859         u32 head = txo->q.head;
860
861         queue_head_inc(&txo->q);
862         return head;
863 }
864
865 /* Set up the WRB header for xmit */
866 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
867                                 struct be_tx_obj *txo,
868                                 struct be_wrb_params *wrb_params,
869                                 struct sk_buff *skb, u16 head)
870 {
871         u32 num_frags = skb_wrb_cnt(skb);
872         struct be_queue_info *txq = &txo->q;
873         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
874
875         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
876         be_dws_cpu_to_le(hdr, sizeof(*hdr));
877
878         BUG_ON(txo->sent_skb_list[head]);
879         txo->sent_skb_list[head] = skb;
880         txo->last_req_hdr = head;
881         atomic_add(num_frags, &txq->used);
882         txo->last_req_wrb_cnt = num_frags;
883         txo->pend_wrb_cnt += num_frags;
884 }
885
886 /* Setup a WRB fragment (buffer descriptor) for xmit */
887 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
888                                  int len)
889 {
890         struct be_eth_wrb *wrb;
891         struct be_queue_info *txq = &txo->q;
892
893         wrb = queue_head_node(txq);
894         wrb_fill(wrb, busaddr, len);
895         queue_head_inc(txq);
896 }
897
898 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
899  * was invoked. The producer index is restored to the previous packet and the
900  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
901  */
902 static void be_xmit_restore(struct be_adapter *adapter,
903                             struct be_tx_obj *txo, u32 head, bool map_single,
904                             u32 copied)
905 {
906         struct device *dev;
907         struct be_eth_wrb *wrb;
908         struct be_queue_info *txq = &txo->q;
909
910         dev = &adapter->pdev->dev;
911         txq->head = head;
912
913         /* skip the first wrb (hdr); it's not mapped */
914         queue_head_inc(txq);
915         while (copied) {
916                 wrb = queue_head_node(txq);
917                 unmap_tx_frag(dev, wrb, map_single);
918                 map_single = false;
919                 copied -= le32_to_cpu(wrb->frag_len);
920                 queue_head_inc(txq);
921         }
922
923         txq->head = head;
924 }
925
926 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
927  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
928  * of WRBs used up by the packet.
929  */
930 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
931                            struct sk_buff *skb,
932                            struct be_wrb_params *wrb_params)
933 {
934         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
935         struct device *dev = &adapter->pdev->dev;
936         struct be_queue_info *txq = &txo->q;
937         bool map_single = false;
938         u32 head = txq->head;
939         dma_addr_t busaddr;
940         int len;
941
942         head = be_tx_get_wrb_hdr(txo);
943
944         if (skb->len > skb->data_len) {
945                 len = skb_headlen(skb);
946
947                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
948                 if (dma_mapping_error(dev, busaddr))
949                         goto dma_err;
950                 map_single = true;
951                 be_tx_setup_wrb_frag(txo, busaddr, len);
952                 copied += len;
953         }
954
955         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
956                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
957                 len = skb_frag_size(frag);
958
959                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
960                 if (dma_mapping_error(dev, busaddr))
961                         goto dma_err;
962                 be_tx_setup_wrb_frag(txo, busaddr, len);
963                 copied += len;
964         }
965
966         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
967
968         be_tx_stats_update(txo, skb);
969         return wrb_cnt;
970
971 dma_err:
972         adapter->drv_stats.dma_map_errors++;
973         be_xmit_restore(adapter, txo, head, map_single, copied);
974         return 0;
975 }
976
977 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
978 {
979         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
980 }
981
982 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
983                                              struct sk_buff *skb,
984                                              struct be_wrb_params
985                                              *wrb_params)
986 {
987         u16 vlan_tag = 0;
988
989         skb = skb_share_check(skb, GFP_ATOMIC);
990         if (unlikely(!skb))
991                 return skb;
992
993         if (skb_vlan_tag_present(skb))
994                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
995
996         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
997                 if (!vlan_tag)
998                         vlan_tag = adapter->pvid;
999                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1000                  * skip VLAN insertion
1001                  */
1002                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1003         }
1004
1005         if (vlan_tag) {
1006                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1007                                                 vlan_tag);
1008                 if (unlikely(!skb))
1009                         return skb;
1010                 skb->vlan_tci = 0;
1011         }
1012
1013         /* Insert the outer VLAN, if any */
1014         if (adapter->qnq_vid) {
1015                 vlan_tag = adapter->qnq_vid;
1016                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1017                                                 vlan_tag);
1018                 if (unlikely(!skb))
1019                         return skb;
1020                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1021         }
1022
1023         return skb;
1024 }
1025
1026 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1027 {
1028         struct ethhdr *eh = (struct ethhdr *)skb->data;
1029         u16 offset = ETH_HLEN;
1030
1031         if (eh->h_proto == htons(ETH_P_IPV6)) {
1032                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1033
1034                 offset += sizeof(struct ipv6hdr);
1035                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1036                     ip6h->nexthdr != NEXTHDR_UDP) {
1037                         struct ipv6_opt_hdr *ehdr =
1038                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1039
1040                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1041                         if (ehdr->hdrlen == 0xff)
1042                                 return true;
1043                 }
1044         }
1045         return false;
1046 }
1047
1048 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1049 {
1050         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1051 }
1052
1053 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1054 {
1055         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1056 }
1057
1058 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1059                                                   struct sk_buff *skb,
1060                                                   struct be_wrb_params
1061                                                   *wrb_params)
1062 {
1063         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1064         unsigned int eth_hdr_len;
1065         struct iphdr *ip;
1066
1067         /* For padded packets, BE HW modifies tot_len field in IP header
1068          * incorrecly when VLAN tag is inserted by HW.
1069          * For padded packets, Lancer computes incorrect checksum.
1070          */
1071         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1072                                                 VLAN_ETH_HLEN : ETH_HLEN;
1073         if (skb->len <= 60 &&
1074             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1075             is_ipv4_pkt(skb)) {
1076                 ip = (struct iphdr *)ip_hdr(skb);
1077                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1078         }
1079
1080         /* If vlan tag is already inlined in the packet, skip HW VLAN
1081          * tagging in pvid-tagging mode
1082          */
1083         if (be_pvid_tagging_enabled(adapter) &&
1084             veh->h_vlan_proto == htons(ETH_P_8021Q))
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086
1087         /* HW has a bug wherein it will calculate CSUM for VLAN
1088          * pkts even though it is disabled.
1089          * Manually insert VLAN in pkt.
1090          */
1091         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1092             skb_vlan_tag_present(skb)) {
1093                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1094                 if (unlikely(!skb))
1095                         goto err;
1096         }
1097
1098         /* HW may lockup when VLAN HW tagging is requested on
1099          * certain ipv6 packets. Drop such pkts if the HW workaround to
1100          * skip HW tagging is not enabled by FW.
1101          */
1102         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1103                      (adapter->pvid || adapter->qnq_vid) &&
1104                      !qnq_async_evt_rcvd(adapter)))
1105                 goto tx_drop;
1106
1107         /* Manual VLAN tag insertion to prevent:
1108          * ASIC lockup when the ASIC inserts VLAN tag into
1109          * certain ipv6 packets. Insert VLAN tags in driver,
1110          * and set event, completion, vlan bits accordingly
1111          * in the Tx WRB.
1112          */
1113         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1114             be_vlan_tag_tx_chk(adapter, skb)) {
1115                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1116                 if (unlikely(!skb))
1117                         goto err;
1118         }
1119
1120         return skb;
1121 tx_drop:
1122         dev_kfree_skb_any(skb);
1123 err:
1124         return NULL;
1125 }
1126
1127 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1128                                            struct sk_buff *skb,
1129                                            struct be_wrb_params *wrb_params)
1130 {
1131         int err;
1132
1133         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1134          * packets that are 32b or less may cause a transmit stall
1135          * on that port. The workaround is to pad such packets
1136          * (len <= 32 bytes) to a minimum length of 36b.
1137          */
1138         if (skb->len <= 32) {
1139                 if (skb_put_padto(skb, 36))
1140                         return NULL;
1141         }
1142
1143         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1144                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1145                 if (!skb)
1146                         return NULL;
1147         }
1148
1149         /* The stack can send us skbs with length greater than
1150          * what the HW can handle. Trim the extra bytes.
1151          */
1152         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1153         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1154         WARN_ON(err);
1155
1156         return skb;
1157 }
1158
1159 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1160 {
1161         struct be_queue_info *txq = &txo->q;
1162         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1163
1164         /* Mark the last request eventable if it hasn't been marked already */
1165         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1166                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1167
1168         /* compose a dummy wrb if there are odd set of wrbs to notify */
1169         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1170                 wrb_fill_dummy(queue_head_node(txq));
1171                 queue_head_inc(txq);
1172                 atomic_inc(&txq->used);
1173                 txo->pend_wrb_cnt++;
1174                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1175                                            TX_HDR_WRB_NUM_SHIFT);
1176                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1177                                           TX_HDR_WRB_NUM_SHIFT);
1178         }
1179         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1180         txo->pend_wrb_cnt = 0;
1181 }
1182
1183 /* OS2BMC related */
1184
1185 #define DHCP_CLIENT_PORT        68
1186 #define DHCP_SERVER_PORT        67
1187 #define NET_BIOS_PORT1          137
1188 #define NET_BIOS_PORT2          138
1189 #define DHCPV6_RAS_PORT         547
1190
1191 #define is_mc_allowed_on_bmc(adapter, eh)       \
1192         (!is_multicast_filt_enabled(adapter) && \
1193          is_multicast_ether_addr(eh->h_dest) && \
1194          !is_broadcast_ether_addr(eh->h_dest))
1195
1196 #define is_bc_allowed_on_bmc(adapter, eh)       \
1197         (!is_broadcast_filt_enabled(adapter) && \
1198          is_broadcast_ether_addr(eh->h_dest))
1199
1200 #define is_arp_allowed_on_bmc(adapter, skb)     \
1201         (is_arp(skb) && is_arp_filt_enabled(adapter))
1202
1203 #define is_broadcast_packet(eh, adapter)        \
1204                 (is_multicast_ether_addr(eh->h_dest) && \
1205                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1206
1207 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1208
1209 #define is_arp_filt_enabled(adapter)    \
1210                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1211
1212 #define is_dhcp_client_filt_enabled(adapter)    \
1213                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1214
1215 #define is_dhcp_srvr_filt_enabled(adapter)      \
1216                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1217
1218 #define is_nbios_filt_enabled(adapter)  \
1219                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1220
1221 #define is_ipv6_na_filt_enabled(adapter)        \
1222                 (adapter->bmc_filt_mask &       \
1223                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1224
1225 #define is_ipv6_ra_filt_enabled(adapter)        \
1226                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1227
1228 #define is_ipv6_ras_filt_enabled(adapter)       \
1229                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1230
1231 #define is_broadcast_filt_enabled(adapter)      \
1232                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1233
1234 #define is_multicast_filt_enabled(adapter)      \
1235                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1236
1237 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1238                                struct sk_buff **skb)
1239 {
1240         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1241         bool os2bmc = false;
1242
1243         if (!be_is_os2bmc_enabled(adapter))
1244                 goto done;
1245
1246         if (!is_multicast_ether_addr(eh->h_dest))
1247                 goto done;
1248
1249         if (is_mc_allowed_on_bmc(adapter, eh) ||
1250             is_bc_allowed_on_bmc(adapter, eh) ||
1251             is_arp_allowed_on_bmc(adapter, (*skb))) {
1252                 os2bmc = true;
1253                 goto done;
1254         }
1255
1256         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1257                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1258                 u8 nexthdr = hdr->nexthdr;
1259
1260                 if (nexthdr == IPPROTO_ICMPV6) {
1261                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1262
1263                         switch (icmp6->icmp6_type) {
1264                         case NDISC_ROUTER_ADVERTISEMENT:
1265                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1266                                 goto done;
1267                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1268                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1269                                 goto done;
1270                         default:
1271                                 break;
1272                         }
1273                 }
1274         }
1275
1276         if (is_udp_pkt((*skb))) {
1277                 struct udphdr *udp = udp_hdr((*skb));
1278
1279                 switch (ntohs(udp->dest)) {
1280                 case DHCP_CLIENT_PORT:
1281                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1282                         goto done;
1283                 case DHCP_SERVER_PORT:
1284                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1285                         goto done;
1286                 case NET_BIOS_PORT1:
1287                 case NET_BIOS_PORT2:
1288                         os2bmc = is_nbios_filt_enabled(adapter);
1289                         goto done;
1290                 case DHCPV6_RAS_PORT:
1291                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1292                         goto done;
1293                 default:
1294                         break;
1295                 }
1296         }
1297 done:
1298         /* For packets over a vlan, which are destined
1299          * to BMC, asic expects the vlan to be inline in the packet.
1300          */
1301         if (os2bmc)
1302                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1303
1304         return os2bmc;
1305 }
1306
1307 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1308 {
1309         struct be_adapter *adapter = netdev_priv(netdev);
1310         u16 q_idx = skb_get_queue_mapping(skb);
1311         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1312         struct be_wrb_params wrb_params = { 0 };
1313         bool flush = !skb->xmit_more;
1314         u16 wrb_cnt;
1315
1316         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1317         if (unlikely(!skb))
1318                 goto drop;
1319
1320         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1321
1322         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1323         if (unlikely(!wrb_cnt)) {
1324                 dev_kfree_skb_any(skb);
1325                 goto drop;
1326         }
1327
1328         /* if os2bmc is enabled and if the pkt is destined to bmc,
1329          * enqueue the pkt a 2nd time with mgmt bit set.
1330          */
1331         if (be_send_pkt_to_bmc(adapter, &skb)) {
1332                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1333                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1334                 if (unlikely(!wrb_cnt))
1335                         goto drop;
1336                 else
1337                         skb_get(skb);
1338         }
1339
1340         if (be_is_txq_full(txo)) {
1341                 netif_stop_subqueue(netdev, q_idx);
1342                 tx_stats(txo)->tx_stops++;
1343         }
1344
1345         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1346                 be_xmit_flush(adapter, txo);
1347
1348         return NETDEV_TX_OK;
1349 drop:
1350         tx_stats(txo)->tx_drv_drops++;
1351         /* Flush the already enqueued tx requests */
1352         if (flush && txo->pend_wrb_cnt)
1353                 be_xmit_flush(adapter, txo);
1354
1355         return NETDEV_TX_OK;
1356 }
1357
1358 static int be_change_mtu(struct net_device *netdev, int new_mtu)
1359 {
1360         struct be_adapter *adapter = netdev_priv(netdev);
1361         struct device *dev = &adapter->pdev->dev;
1362
1363         if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
1364                 dev_info(dev, "MTU must be between %d and %d bytes\n",
1365                          BE_MIN_MTU, BE_MAX_MTU);
1366                 return -EINVAL;
1367         }
1368
1369         dev_info(dev, "MTU changed from %d to %d bytes\n",
1370                  netdev->mtu, new_mtu);
1371         netdev->mtu = new_mtu;
1372         return 0;
1373 }
1374
1375 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1376 {
1377         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1378                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1379 }
1380
1381 static int be_set_vlan_promisc(struct be_adapter *adapter)
1382 {
1383         struct device *dev = &adapter->pdev->dev;
1384         int status;
1385
1386         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1387                 return 0;
1388
1389         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1390         if (!status) {
1391                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1392                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1393         } else {
1394                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1395         }
1396         return status;
1397 }
1398
1399 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1400 {
1401         struct device *dev = &adapter->pdev->dev;
1402         int status;
1403
1404         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1405         if (!status) {
1406                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1407                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1408         }
1409         return status;
1410 }
1411
1412 /*
1413  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1414  * If the user configures more, place BE in vlan promiscuous mode.
1415  */
1416 static int be_vid_config(struct be_adapter *adapter)
1417 {
1418         struct device *dev = &adapter->pdev->dev;
1419         u16 vids[BE_NUM_VLANS_SUPPORTED];
1420         u16 num = 0, i = 0;
1421         int status = 0;
1422
1423         /* No need to further configure vids if in promiscuous mode */
1424         if (be_in_all_promisc(adapter))
1425                 return 0;
1426
1427         if (adapter->vlans_added > be_max_vlans(adapter))
1428                 return be_set_vlan_promisc(adapter);
1429
1430         /* Construct VLAN Table to give to HW */
1431         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1432                 vids[num++] = cpu_to_le16(i);
1433
1434         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1435         if (status) {
1436                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1437                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1438                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1439                     addl_status(status) ==
1440                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1441                         return be_set_vlan_promisc(adapter);
1442         } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1443                 status = be_clear_vlan_promisc(adapter);
1444         }
1445         return status;
1446 }
1447
1448 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1449 {
1450         struct be_adapter *adapter = netdev_priv(netdev);
1451         int status = 0;
1452
1453         /* Packets with VID 0 are always received by Lancer by default */
1454         if (lancer_chip(adapter) && vid == 0)
1455                 return status;
1456
1457         if (test_bit(vid, adapter->vids))
1458                 return status;
1459
1460         set_bit(vid, adapter->vids);
1461         adapter->vlans_added++;
1462
1463         status = be_vid_config(adapter);
1464         if (status) {
1465                 adapter->vlans_added--;
1466                 clear_bit(vid, adapter->vids);
1467         }
1468
1469         return status;
1470 }
1471
1472 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1473 {
1474         struct be_adapter *adapter = netdev_priv(netdev);
1475
1476         /* Packets with VID 0 are always received by Lancer by default */
1477         if (lancer_chip(adapter) && vid == 0)
1478                 return 0;
1479
1480         if (!test_bit(vid, adapter->vids))
1481                 return 0;
1482
1483         clear_bit(vid, adapter->vids);
1484         adapter->vlans_added--;
1485
1486         return be_vid_config(adapter);
1487 }
1488
1489 static void be_clear_all_promisc(struct be_adapter *adapter)
1490 {
1491         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
1492         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
1493 }
1494
1495 static void be_set_all_promisc(struct be_adapter *adapter)
1496 {
1497         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1498         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1499 }
1500
1501 static void be_set_mc_promisc(struct be_adapter *adapter)
1502 {
1503         int status;
1504
1505         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1506                 return;
1507
1508         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1509         if (!status)
1510                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1511 }
1512
1513 static void be_set_mc_list(struct be_adapter *adapter)
1514 {
1515         int status;
1516
1517         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1518         if (!status)
1519                 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1520         else
1521                 be_set_mc_promisc(adapter);
1522 }
1523
1524 static void be_set_uc_list(struct be_adapter *adapter)
1525 {
1526         struct netdev_hw_addr *ha;
1527         int i = 1; /* First slot is claimed by the Primary MAC */
1528
1529         for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
1530                 be_cmd_pmac_del(adapter, adapter->if_handle,
1531                                 adapter->pmac_id[i], 0);
1532
1533         if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
1534                 be_set_all_promisc(adapter);
1535                 return;
1536         }
1537
1538         netdev_for_each_uc_addr(ha, adapter->netdev) {
1539                 adapter->uc_macs++; /* First slot is for Primary MAC */
1540                 be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
1541                                 &adapter->pmac_id[adapter->uc_macs], 0);
1542         }
1543 }
1544
1545 static void be_clear_uc_list(struct be_adapter *adapter)
1546 {
1547         int i;
1548
1549         for (i = 1; i < (adapter->uc_macs + 1); i++)
1550                 be_cmd_pmac_del(adapter, adapter->if_handle,
1551                                 adapter->pmac_id[i], 0);
1552         adapter->uc_macs = 0;
1553 }
1554
1555 static void be_set_rx_mode(struct net_device *netdev)
1556 {
1557         struct be_adapter *adapter = netdev_priv(netdev);
1558
1559         if (netdev->flags & IFF_PROMISC) {
1560                 be_set_all_promisc(adapter);
1561                 return;
1562         }
1563
1564         /* Interface was previously in promiscuous mode; disable it */
1565         if (be_in_all_promisc(adapter)) {
1566                 be_clear_all_promisc(adapter);
1567                 if (adapter->vlans_added)
1568                         be_vid_config(adapter);
1569         }
1570
1571         /* Enable multicast promisc if num configured exceeds what we support */
1572         if (netdev->flags & IFF_ALLMULTI ||
1573             netdev_mc_count(netdev) > be_max_mc(adapter)) {
1574                 be_set_mc_promisc(adapter);
1575                 return;
1576         }
1577
1578         if (netdev_uc_count(netdev) != adapter->uc_macs)
1579                 be_set_uc_list(adapter);
1580
1581         be_set_mc_list(adapter);
1582 }
1583
1584 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1585 {
1586         struct be_adapter *adapter = netdev_priv(netdev);
1587         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1588         int status;
1589
1590         if (!sriov_enabled(adapter))
1591                 return -EPERM;
1592
1593         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1594                 return -EINVAL;
1595
1596         /* Proceed further only if user provided MAC is different
1597          * from active MAC
1598          */
1599         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1600                 return 0;
1601
1602         if (BEx_chip(adapter)) {
1603                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1604                                 vf + 1);
1605
1606                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1607                                          &vf_cfg->pmac_id, vf + 1);
1608         } else {
1609                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1610                                         vf + 1);
1611         }
1612
1613         if (status) {
1614                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1615                         mac, vf, status);
1616                 return be_cmd_status(status);
1617         }
1618
1619         ether_addr_copy(vf_cfg->mac_addr, mac);
1620
1621         return 0;
1622 }
1623
1624 static int be_get_vf_config(struct net_device *netdev, int vf,
1625                             struct ifla_vf_info *vi)
1626 {
1627         struct be_adapter *adapter = netdev_priv(netdev);
1628         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1629
1630         if (!sriov_enabled(adapter))
1631                 return -EPERM;
1632
1633         if (vf >= adapter->num_vfs)
1634                 return -EINVAL;
1635
1636         vi->vf = vf;
1637         vi->max_tx_rate = vf_cfg->tx_rate;
1638         vi->min_tx_rate = 0;
1639         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1640         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1641         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1642         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1643         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1644
1645         return 0;
1646 }
1647
1648 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1649 {
1650         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1651         u16 vids[BE_NUM_VLANS_SUPPORTED];
1652         int vf_if_id = vf_cfg->if_handle;
1653         int status;
1654
1655         /* Enable Transparent VLAN Tagging */
1656         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1657         if (status)
1658                 return status;
1659
1660         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1661         vids[0] = 0;
1662         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1663         if (!status)
1664                 dev_info(&adapter->pdev->dev,
1665                          "Cleared guest VLANs on VF%d", vf);
1666
1667         /* After TVT is enabled, disallow VFs to program VLAN filters */
1668         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1669                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1670                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1671                 if (!status)
1672                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1673         }
1674         return 0;
1675 }
1676
1677 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1678 {
1679         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1680         struct device *dev = &adapter->pdev->dev;
1681         int status;
1682
1683         /* Reset Transparent VLAN Tagging. */
1684         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1685                                        vf_cfg->if_handle, 0, 0);
1686         if (status)
1687                 return status;
1688
1689         /* Allow VFs to program VLAN filtering */
1690         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1691                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1692                                                   BE_PRIV_FILTMGMT, vf + 1);
1693                 if (!status) {
1694                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1695                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1696                 }
1697         }
1698
1699         dev_info(dev,
1700                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1701         return 0;
1702 }
1703
1704 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
1705 {
1706         struct be_adapter *adapter = netdev_priv(netdev);
1707         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1708         int status;
1709
1710         if (!sriov_enabled(adapter))
1711                 return -EPERM;
1712
1713         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1714                 return -EINVAL;
1715
1716         if (vlan || qos) {
1717                 vlan |= qos << VLAN_PRIO_SHIFT;
1718                 status = be_set_vf_tvt(adapter, vf, vlan);
1719         } else {
1720                 status = be_clear_vf_tvt(adapter, vf);
1721         }
1722
1723         if (status) {
1724                 dev_err(&adapter->pdev->dev,
1725                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1726                         status);
1727                 return be_cmd_status(status);
1728         }
1729
1730         vf_cfg->vlan_tag = vlan;
1731         return 0;
1732 }
1733
1734 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1735                              int min_tx_rate, int max_tx_rate)
1736 {
1737         struct be_adapter *adapter = netdev_priv(netdev);
1738         struct device *dev = &adapter->pdev->dev;
1739         int percent_rate, status = 0;
1740         u16 link_speed = 0;
1741         u8 link_status;
1742
1743         if (!sriov_enabled(adapter))
1744                 return -EPERM;
1745
1746         if (vf >= adapter->num_vfs)
1747                 return -EINVAL;
1748
1749         if (min_tx_rate)
1750                 return -EINVAL;
1751
1752         if (!max_tx_rate)
1753                 goto config_qos;
1754
1755         status = be_cmd_link_status_query(adapter, &link_speed,
1756                                           &link_status, 0);
1757         if (status)
1758                 goto err;
1759
1760         if (!link_status) {
1761                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1762                 status = -ENETDOWN;
1763                 goto err;
1764         }
1765
1766         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1767                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1768                         link_speed);
1769                 status = -EINVAL;
1770                 goto err;
1771         }
1772
1773         /* On Skyhawk the QOS setting must be done only as a % value */
1774         percent_rate = link_speed / 100;
1775         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1776                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1777                         percent_rate);
1778                 status = -EINVAL;
1779                 goto err;
1780         }
1781
1782 config_qos:
1783         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1784         if (status)
1785                 goto err;
1786
1787         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1788         return 0;
1789
1790 err:
1791         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1792                 max_tx_rate, vf);
1793         return be_cmd_status(status);
1794 }
1795
1796 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1797                                 int link_state)
1798 {
1799         struct be_adapter *adapter = netdev_priv(netdev);
1800         int status;
1801
1802         if (!sriov_enabled(adapter))
1803                 return -EPERM;
1804
1805         if (vf >= adapter->num_vfs)
1806                 return -EINVAL;
1807
1808         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
1809         if (status) {
1810                 dev_err(&adapter->pdev->dev,
1811                         "Link state change on VF %d failed: %#x\n", vf, status);
1812                 return be_cmd_status(status);
1813         }
1814
1815         adapter->vf_cfg[vf].plink_tracking = link_state;
1816
1817         return 0;
1818 }
1819
1820 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
1821 {
1822         struct be_adapter *adapter = netdev_priv(netdev);
1823         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1824         u8 spoofchk;
1825         int status;
1826
1827         if (!sriov_enabled(adapter))
1828                 return -EPERM;
1829
1830         if (vf >= adapter->num_vfs)
1831                 return -EINVAL;
1832
1833         if (BEx_chip(adapter))
1834                 return -EOPNOTSUPP;
1835
1836         if (enable == vf_cfg->spoofchk)
1837                 return 0;
1838
1839         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
1840
1841         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
1842                                        0, spoofchk);
1843         if (status) {
1844                 dev_err(&adapter->pdev->dev,
1845                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
1846                 return be_cmd_status(status);
1847         }
1848
1849         vf_cfg->spoofchk = enable;
1850         return 0;
1851 }
1852
1853 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
1854                           ulong now)
1855 {
1856         aic->rx_pkts_prev = rx_pkts;
1857         aic->tx_reqs_prev = tx_pkts;
1858         aic->jiffies = now;
1859 }
1860
1861 static int be_get_new_eqd(struct be_eq_obj *eqo)
1862 {
1863         struct be_adapter *adapter = eqo->adapter;
1864         int eqd, start;
1865         struct be_aic_obj *aic;
1866         struct be_rx_obj *rxo;
1867         struct be_tx_obj *txo;
1868         u64 rx_pkts = 0, tx_pkts = 0;
1869         ulong now;
1870         u32 pps, delta;
1871         int i;
1872
1873         aic = &adapter->aic_obj[eqo->idx];
1874         if (!aic->enable) {
1875                 if (aic->jiffies)
1876                         aic->jiffies = 0;
1877                 eqd = aic->et_eqd;
1878                 return eqd;
1879         }
1880
1881         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
1882                 do {
1883                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
1884                         rx_pkts += rxo->stats.rx_pkts;
1885                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
1886         }
1887
1888         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
1889                 do {
1890                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
1891                         tx_pkts += txo->stats.tx_reqs;
1892                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
1893         }
1894
1895         /* Skip, if wrapped around or first calculation */
1896         now = jiffies;
1897         if (!aic->jiffies || time_before(now, aic->jiffies) ||
1898             rx_pkts < aic->rx_pkts_prev ||
1899             tx_pkts < aic->tx_reqs_prev) {
1900                 be_aic_update(aic, rx_pkts, tx_pkts, now);
1901                 return aic->prev_eqd;
1902         }
1903
1904         delta = jiffies_to_msecs(now - aic->jiffies);
1905         if (delta == 0)
1906                 return aic->prev_eqd;
1907
1908         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
1909                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
1910         eqd = (pps / 15000) << 2;
1911
1912         if (eqd < 8)
1913                 eqd = 0;
1914         eqd = min_t(u32, eqd, aic->max_eqd);
1915         eqd = max_t(u32, eqd, aic->min_eqd);
1916
1917         be_aic_update(aic, rx_pkts, tx_pkts, now);
1918
1919         return eqd;
1920 }
1921
1922 /* For Skyhawk-R only */
1923 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
1924 {
1925         struct be_adapter *adapter = eqo->adapter;
1926         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
1927         ulong now = jiffies;
1928         int eqd;
1929         u32 mult_enc;
1930
1931         if (!aic->enable)
1932                 return 0;
1933
1934         if (jiffies_to_msecs(now - aic->jiffies) < 1)
1935                 eqd = aic->prev_eqd;
1936         else
1937                 eqd = be_get_new_eqd(eqo);
1938
1939         if (eqd > 100)
1940                 mult_enc = R2I_DLY_ENC_1;
1941         else if (eqd > 60)
1942                 mult_enc = R2I_DLY_ENC_2;
1943         else if (eqd > 20)
1944                 mult_enc = R2I_DLY_ENC_3;
1945         else
1946                 mult_enc = R2I_DLY_ENC_0;
1947
1948         aic->prev_eqd = eqd;
1949
1950         return mult_enc;
1951 }
1952
1953 void be_eqd_update(struct be_adapter *adapter, bool force_update)
1954 {
1955         struct be_set_eqd set_eqd[MAX_EVT_QS];
1956         struct be_aic_obj *aic;
1957         struct be_eq_obj *eqo;
1958         int i, num = 0, eqd;
1959
1960         for_all_evt_queues(adapter, eqo, i) {
1961                 aic = &adapter->aic_obj[eqo->idx];
1962                 eqd = be_get_new_eqd(eqo);
1963                 if (force_update || eqd != aic->prev_eqd) {
1964                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
1965                         set_eqd[num].eq_id = eqo->q.id;
1966                         aic->prev_eqd = eqd;
1967                         num++;
1968                 }
1969         }
1970
1971         if (num)
1972                 be_cmd_modify_eqd(adapter, set_eqd, num);
1973 }
1974
1975 static void be_rx_stats_update(struct be_rx_obj *rxo,
1976                                struct be_rx_compl_info *rxcp)
1977 {
1978         struct be_rx_stats *stats = rx_stats(rxo);
1979
1980         u64_stats_update_begin(&stats->sync);
1981         stats->rx_compl++;
1982         stats->rx_bytes += rxcp->pkt_size;
1983         stats->rx_pkts++;
1984         if (rxcp->tunneled)
1985                 stats->rx_vxlan_offload_pkts++;
1986         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
1987                 stats->rx_mcast_pkts++;
1988         if (rxcp->err)
1989                 stats->rx_compl_err++;
1990         u64_stats_update_end(&stats->sync);
1991 }
1992
1993 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
1994 {
1995         /* L4 checksum is not reliable for non TCP/UDP packets.
1996          * Also ignore ipcksm for ipv6 pkts
1997          */
1998         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
1999                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2000 }
2001
2002 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2003 {
2004         struct be_adapter *adapter = rxo->adapter;
2005         struct be_rx_page_info *rx_page_info;
2006         struct be_queue_info *rxq = &rxo->q;
2007         u32 frag_idx = rxq->tail;
2008
2009         rx_page_info = &rxo->page_info_tbl[frag_idx];
2010         BUG_ON(!rx_page_info->page);
2011
2012         if (rx_page_info->last_frag) {
2013                 dma_unmap_page(&adapter->pdev->dev,
2014                                dma_unmap_addr(rx_page_info, bus),
2015                                adapter->big_page_size, DMA_FROM_DEVICE);
2016                 rx_page_info->last_frag = false;
2017         } else {
2018                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2019                                         dma_unmap_addr(rx_page_info, bus),
2020                                         rx_frag_size, DMA_FROM_DEVICE);
2021         }
2022
2023         queue_tail_inc(rxq);
2024         atomic_dec(&rxq->used);
2025         return rx_page_info;
2026 }
2027
2028 /* Throwaway the data in the Rx completion */
2029 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2030                                 struct be_rx_compl_info *rxcp)
2031 {
2032         struct be_rx_page_info *page_info;
2033         u16 i, num_rcvd = rxcp->num_rcvd;
2034
2035         for (i = 0; i < num_rcvd; i++) {
2036                 page_info = get_rx_page_info(rxo);
2037                 put_page(page_info->page);
2038                 memset(page_info, 0, sizeof(*page_info));
2039         }
2040 }
2041
2042 /*
2043  * skb_fill_rx_data forms a complete skb for an ether frame
2044  * indicated by rxcp.
2045  */
2046 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2047                              struct be_rx_compl_info *rxcp)
2048 {
2049         struct be_rx_page_info *page_info;
2050         u16 i, j;
2051         u16 hdr_len, curr_frag_len, remaining;
2052         u8 *start;
2053
2054         page_info = get_rx_page_info(rxo);
2055         start = page_address(page_info->page) + page_info->page_offset;
2056         prefetch(start);
2057
2058         /* Copy data in the first descriptor of this completion */
2059         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2060
2061         skb->len = curr_frag_len;
2062         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2063                 memcpy(skb->data, start, curr_frag_len);
2064                 /* Complete packet has now been moved to data */
2065                 put_page(page_info->page);
2066                 skb->data_len = 0;
2067                 skb->tail += curr_frag_len;
2068         } else {
2069                 hdr_len = ETH_HLEN;
2070                 memcpy(skb->data, start, hdr_len);
2071                 skb_shinfo(skb)->nr_frags = 1;
2072                 skb_frag_set_page(skb, 0, page_info->page);
2073                 skb_shinfo(skb)->frags[0].page_offset =
2074                                         page_info->page_offset + hdr_len;
2075                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2076                                   curr_frag_len - hdr_len);
2077                 skb->data_len = curr_frag_len - hdr_len;
2078                 skb->truesize += rx_frag_size;
2079                 skb->tail += hdr_len;
2080         }
2081         page_info->page = NULL;
2082
2083         if (rxcp->pkt_size <= rx_frag_size) {
2084                 BUG_ON(rxcp->num_rcvd != 1);
2085                 return;
2086         }
2087
2088         /* More frags present for this completion */
2089         remaining = rxcp->pkt_size - curr_frag_len;
2090         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2091                 page_info = get_rx_page_info(rxo);
2092                 curr_frag_len = min(remaining, rx_frag_size);
2093
2094                 /* Coalesce all frags from the same physical page in one slot */
2095                 if (page_info->page_offset == 0) {
2096                         /* Fresh page */
2097                         j++;
2098                         skb_frag_set_page(skb, j, page_info->page);
2099                         skb_shinfo(skb)->frags[j].page_offset =
2100                                                         page_info->page_offset;
2101                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2102                         skb_shinfo(skb)->nr_frags++;
2103                 } else {
2104                         put_page(page_info->page);
2105                 }
2106
2107                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2108                 skb->len += curr_frag_len;
2109                 skb->data_len += curr_frag_len;
2110                 skb->truesize += rx_frag_size;
2111                 remaining -= curr_frag_len;
2112                 page_info->page = NULL;
2113         }
2114         BUG_ON(j > MAX_SKB_FRAGS);
2115 }
2116
2117 /* Process the RX completion indicated by rxcp when GRO is disabled */
2118 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2119                                 struct be_rx_compl_info *rxcp)
2120 {
2121         struct be_adapter *adapter = rxo->adapter;
2122         struct net_device *netdev = adapter->netdev;
2123         struct sk_buff *skb;
2124
2125         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2126         if (unlikely(!skb)) {
2127                 rx_stats(rxo)->rx_drops_no_skbs++;
2128                 be_rx_compl_discard(rxo, rxcp);
2129                 return;
2130         }
2131
2132         skb_fill_rx_data(rxo, skb, rxcp);
2133
2134         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2135                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2136         else
2137                 skb_checksum_none_assert(skb);
2138
2139         skb->protocol = eth_type_trans(skb, netdev);
2140         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2141         if (netdev->features & NETIF_F_RXHASH)
2142                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2143
2144         skb->csum_level = rxcp->tunneled;
2145         skb_mark_napi_id(skb, napi);
2146
2147         if (rxcp->vlanf)
2148                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2149
2150         netif_receive_skb(skb);
2151 }
2152
2153 /* Process the RX completion indicated by rxcp when GRO is enabled */
2154 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2155                                     struct napi_struct *napi,
2156                                     struct be_rx_compl_info *rxcp)
2157 {
2158         struct be_adapter *adapter = rxo->adapter;
2159         struct be_rx_page_info *page_info;
2160         struct sk_buff *skb = NULL;
2161         u16 remaining, curr_frag_len;
2162         u16 i, j;
2163
2164         skb = napi_get_frags(napi);
2165         if (!skb) {
2166                 be_rx_compl_discard(rxo, rxcp);
2167                 return;
2168         }
2169
2170         remaining = rxcp->pkt_size;
2171         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2172                 page_info = get_rx_page_info(rxo);
2173
2174                 curr_frag_len = min(remaining, rx_frag_size);
2175
2176                 /* Coalesce all frags from the same physical page in one slot */
2177                 if (i == 0 || page_info->page_offset == 0) {
2178                         /* First frag or Fresh page */
2179                         j++;
2180                         skb_frag_set_page(skb, j, page_info->page);
2181                         skb_shinfo(skb)->frags[j].page_offset =
2182                                                         page_info->page_offset;
2183                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2184                 } else {
2185                         put_page(page_info->page);
2186                 }
2187                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2188                 skb->truesize += rx_frag_size;
2189                 remaining -= curr_frag_len;
2190                 memset(page_info, 0, sizeof(*page_info));
2191         }
2192         BUG_ON(j > MAX_SKB_FRAGS);
2193
2194         skb_shinfo(skb)->nr_frags = j + 1;
2195         skb->len = rxcp->pkt_size;
2196         skb->data_len = rxcp->pkt_size;
2197         skb->ip_summed = CHECKSUM_UNNECESSARY;
2198         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2199         if (adapter->netdev->features & NETIF_F_RXHASH)
2200                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2201
2202         skb->csum_level = rxcp->tunneled;
2203
2204         if (rxcp->vlanf)
2205                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2206
2207         napi_gro_frags(napi);
2208 }
2209
2210 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2211                                  struct be_rx_compl_info *rxcp)
2212 {
2213         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2214         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2215         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2216         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2217         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2218         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2219         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2220         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2221         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2222         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2223         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2224         if (rxcp->vlanf) {
2225                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2226                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2227         }
2228         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2229         rxcp->tunneled =
2230                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2231 }
2232
2233 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2234                                  struct be_rx_compl_info *rxcp)
2235 {
2236         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2237         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2238         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2239         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2240         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2241         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2242         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2243         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2244         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2245         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2246         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2247         if (rxcp->vlanf) {
2248                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2249                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2250         }
2251         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2252         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2253 }
2254
2255 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2256 {
2257         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2258         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2259         struct be_adapter *adapter = rxo->adapter;
2260
2261         /* For checking the valid bit it is Ok to use either definition as the
2262          * valid bit is at the same position in both v0 and v1 Rx compl */
2263         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2264                 return NULL;
2265
2266         rmb();
2267         be_dws_le_to_cpu(compl, sizeof(*compl));
2268
2269         if (adapter->be3_native)
2270                 be_parse_rx_compl_v1(compl, rxcp);
2271         else
2272                 be_parse_rx_compl_v0(compl, rxcp);
2273
2274         if (rxcp->ip_frag)
2275                 rxcp->l4_csum = 0;
2276
2277         if (rxcp->vlanf) {
2278                 /* In QNQ modes, if qnq bit is not set, then the packet was
2279                  * tagged only with the transparent outer vlan-tag and must
2280                  * not be treated as a vlan packet by host
2281                  */
2282                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2283                         rxcp->vlanf = 0;
2284
2285                 if (!lancer_chip(adapter))
2286                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2287
2288                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2289                     !test_bit(rxcp->vlan_tag, adapter->vids))
2290                         rxcp->vlanf = 0;
2291         }
2292
2293         /* As the compl has been parsed, reset it; we wont touch it again */
2294         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2295
2296         queue_tail_inc(&rxo->cq);
2297         return rxcp;
2298 }
2299
2300 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2301 {
2302         u32 order = get_order(size);
2303
2304         if (order > 0)
2305                 gfp |= __GFP_COMP;
2306         return  alloc_pages(gfp, order);
2307 }
2308
2309 /*
2310  * Allocate a page, split it to fragments of size rx_frag_size and post as
2311  * receive buffers to BE
2312  */
2313 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2314 {
2315         struct be_adapter *adapter = rxo->adapter;
2316         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2317         struct be_queue_info *rxq = &rxo->q;
2318         struct page *pagep = NULL;
2319         struct device *dev = &adapter->pdev->dev;
2320         struct be_eth_rx_d *rxd;
2321         u64 page_dmaaddr = 0, frag_dmaaddr;
2322         u32 posted, page_offset = 0, notify = 0;
2323
2324         page_info = &rxo->page_info_tbl[rxq->head];
2325         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2326                 if (!pagep) {
2327                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2328                         if (unlikely(!pagep)) {
2329                                 rx_stats(rxo)->rx_post_fail++;
2330                                 break;
2331                         }
2332                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2333                                                     adapter->big_page_size,
2334                                                     DMA_FROM_DEVICE);
2335                         if (dma_mapping_error(dev, page_dmaaddr)) {
2336                                 put_page(pagep);
2337                                 pagep = NULL;
2338                                 adapter->drv_stats.dma_map_errors++;
2339                                 break;
2340                         }
2341                         page_offset = 0;
2342                 } else {
2343                         get_page(pagep);
2344                         page_offset += rx_frag_size;
2345                 }
2346                 page_info->page_offset = page_offset;
2347                 page_info->page = pagep;
2348
2349                 rxd = queue_head_node(rxq);
2350                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2351                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2352                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2353
2354                 /* Any space left in the current big page for another frag? */
2355                 if ((page_offset + rx_frag_size + rx_frag_size) >
2356                                         adapter->big_page_size) {
2357                         pagep = NULL;
2358                         page_info->last_frag = true;
2359                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2360                 } else {
2361                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2362                 }
2363
2364                 prev_page_info = page_info;
2365                 queue_head_inc(rxq);
2366                 page_info = &rxo->page_info_tbl[rxq->head];
2367         }
2368
2369         /* Mark the last frag of a page when we break out of the above loop
2370          * with no more slots available in the RXQ
2371          */
2372         if (pagep) {
2373                 prev_page_info->last_frag = true;
2374                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2375         }
2376
2377         if (posted) {
2378                 atomic_add(posted, &rxq->used);
2379                 if (rxo->rx_post_starved)
2380                         rxo->rx_post_starved = false;
2381                 do {
2382                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2383                         be_rxq_notify(adapter, rxq->id, notify);
2384                         posted -= notify;
2385                 } while (posted);
2386         } else if (atomic_read(&rxq->used) == 0) {
2387                 /* Let be_worker replenish when memory is available */
2388                 rxo->rx_post_starved = true;
2389         }
2390 }
2391
2392 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2393 {
2394         struct be_queue_info *tx_cq = &txo->cq;
2395         struct be_tx_compl_info *txcp = &txo->txcp;
2396         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2397
2398         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2399                 return NULL;
2400
2401         /* Ensure load ordering of valid bit dword and other dwords below */
2402         rmb();
2403         be_dws_le_to_cpu(compl, sizeof(*compl));
2404
2405         txcp->status = GET_TX_COMPL_BITS(status, compl);
2406         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2407
2408         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2409         queue_tail_inc(tx_cq);
2410         return txcp;
2411 }
2412
2413 static u16 be_tx_compl_process(struct be_adapter *adapter,
2414                                struct be_tx_obj *txo, u16 last_index)
2415 {
2416         struct sk_buff **sent_skbs = txo->sent_skb_list;
2417         struct be_queue_info *txq = &txo->q;
2418         struct sk_buff *skb = NULL;
2419         bool unmap_skb_hdr = false;
2420         struct be_eth_wrb *wrb;
2421         u16 num_wrbs = 0;
2422         u32 frag_index;
2423
2424         do {
2425                 if (sent_skbs[txq->tail]) {
2426                         /* Free skb from prev req */
2427                         if (skb)
2428                                 dev_consume_skb_any(skb);
2429                         skb = sent_skbs[txq->tail];
2430                         sent_skbs[txq->tail] = NULL;
2431                         queue_tail_inc(txq);  /* skip hdr wrb */
2432                         num_wrbs++;
2433                         unmap_skb_hdr = true;
2434                 }
2435                 wrb = queue_tail_node(txq);
2436                 frag_index = txq->tail;
2437                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2438                               (unmap_skb_hdr && skb_headlen(skb)));
2439                 unmap_skb_hdr = false;
2440                 queue_tail_inc(txq);
2441                 num_wrbs++;
2442         } while (frag_index != last_index);
2443         dev_consume_skb_any(skb);
2444
2445         return num_wrbs;
2446 }
2447
2448 /* Return the number of events in the event queue */
2449 static inline int events_get(struct be_eq_obj *eqo)
2450 {
2451         struct be_eq_entry *eqe;
2452         int num = 0;
2453
2454         do {
2455                 eqe = queue_tail_node(&eqo->q);
2456                 if (eqe->evt == 0)
2457                         break;
2458
2459                 rmb();
2460                 eqe->evt = 0;
2461                 num++;
2462                 queue_tail_inc(&eqo->q);
2463         } while (true);
2464
2465         return num;
2466 }
2467
2468 /* Leaves the EQ is disarmed state */
2469 static void be_eq_clean(struct be_eq_obj *eqo)
2470 {
2471         int num = events_get(eqo);
2472
2473         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2474 }
2475
2476 /* Free posted rx buffers that were not used */
2477 static void be_rxq_clean(struct be_rx_obj *rxo)
2478 {
2479         struct be_queue_info *rxq = &rxo->q;
2480         struct be_rx_page_info *page_info;
2481
2482         while (atomic_read(&rxq->used) > 0) {
2483                 page_info = get_rx_page_info(rxo);
2484                 put_page(page_info->page);
2485                 memset(page_info, 0, sizeof(*page_info));
2486         }
2487         BUG_ON(atomic_read(&rxq->used));
2488         rxq->tail = 0;
2489         rxq->head = 0;
2490 }
2491
2492 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2493 {
2494         struct be_queue_info *rx_cq = &rxo->cq;
2495         struct be_rx_compl_info *rxcp;
2496         struct be_adapter *adapter = rxo->adapter;
2497         int flush_wait = 0;
2498
2499         /* Consume pending rx completions.
2500          * Wait for the flush completion (identified by zero num_rcvd)
2501          * to arrive. Notify CQ even when there are no more CQ entries
2502          * for HW to flush partially coalesced CQ entries.
2503          * In Lancer, there is no need to wait for flush compl.
2504          */
2505         for (;;) {
2506                 rxcp = be_rx_compl_get(rxo);
2507                 if (!rxcp) {
2508                         if (lancer_chip(adapter))
2509                                 break;
2510
2511                         if (flush_wait++ > 50 ||
2512                             be_check_error(adapter,
2513                                            BE_ERROR_HW)) {
2514                                 dev_warn(&adapter->pdev->dev,
2515                                          "did not receive flush compl\n");
2516                                 break;
2517                         }
2518                         be_cq_notify(adapter, rx_cq->id, true, 0);
2519                         mdelay(1);
2520                 } else {
2521                         be_rx_compl_discard(rxo, rxcp);
2522                         be_cq_notify(adapter, rx_cq->id, false, 1);
2523                         if (rxcp->num_rcvd == 0)
2524                                 break;
2525                 }
2526         }
2527
2528         /* After cleanup, leave the CQ in unarmed state */
2529         be_cq_notify(adapter, rx_cq->id, false, 0);
2530 }
2531
2532 static void be_tx_compl_clean(struct be_adapter *adapter)
2533 {
2534         struct device *dev = &adapter->pdev->dev;
2535         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2536         struct be_tx_compl_info *txcp;
2537         struct be_queue_info *txq;
2538         u32 end_idx, notified_idx;
2539         struct be_tx_obj *txo;
2540         int i, pending_txqs;
2541
2542         /* Stop polling for compls when HW has been silent for 10ms */
2543         do {
2544                 pending_txqs = adapter->num_tx_qs;
2545
2546                 for_all_tx_queues(adapter, txo, i) {
2547                         cmpl = 0;
2548                         num_wrbs = 0;
2549                         txq = &txo->q;
2550                         while ((txcp = be_tx_compl_get(txo))) {
2551                                 num_wrbs +=
2552                                         be_tx_compl_process(adapter, txo,
2553                                                             txcp->end_index);
2554                                 cmpl++;
2555                         }
2556                         if (cmpl) {
2557                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2558                                 atomic_sub(num_wrbs, &txq->used);
2559                                 timeo = 0;
2560                         }
2561                         if (!be_is_tx_compl_pending(txo))
2562                                 pending_txqs--;
2563                 }
2564
2565                 if (pending_txqs == 0 || ++timeo > 10 ||
2566                     be_check_error(adapter, BE_ERROR_HW))
2567                         break;
2568
2569                 mdelay(1);
2570         } while (true);
2571
2572         /* Free enqueued TX that was never notified to HW */
2573         for_all_tx_queues(adapter, txo, i) {
2574                 txq = &txo->q;
2575
2576                 if (atomic_read(&txq->used)) {
2577                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2578                                  i, atomic_read(&txq->used));
2579                         notified_idx = txq->tail;
2580                         end_idx = txq->tail;
2581                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2582                                   txq->len);
2583                         /* Use the tx-compl process logic to handle requests
2584                          * that were not sent to the HW.
2585                          */
2586                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2587                         atomic_sub(num_wrbs, &txq->used);
2588                         BUG_ON(atomic_read(&txq->used));
2589                         txo->pend_wrb_cnt = 0;
2590                         /* Since hw was never notified of these requests,
2591                          * reset TXQ indices
2592                          */
2593                         txq->head = notified_idx;
2594                         txq->tail = notified_idx;
2595                 }
2596         }
2597 }
2598
2599 static void be_evt_queues_destroy(struct be_adapter *adapter)
2600 {
2601         struct be_eq_obj *eqo;
2602         int i;
2603
2604         for_all_evt_queues(adapter, eqo, i) {
2605                 if (eqo->q.created) {
2606                         be_eq_clean(eqo);
2607                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2608                         napi_hash_del(&eqo->napi);
2609                         netif_napi_del(&eqo->napi);
2610                         free_cpumask_var(eqo->affinity_mask);
2611                 }
2612                 be_queue_free(adapter, &eqo->q);
2613         }
2614 }
2615
2616 static int be_evt_queues_create(struct be_adapter *adapter)
2617 {
2618         struct be_queue_info *eq;
2619         struct be_eq_obj *eqo;
2620         struct be_aic_obj *aic;
2621         int i, rc;
2622
2623         /* need enough EQs to service both RX and TX queues */
2624         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2625                                     max(adapter->cfg_num_rx_irqs,
2626                                         adapter->cfg_num_tx_irqs));
2627
2628         for_all_evt_queues(adapter, eqo, i) {
2629                 int numa_node = dev_to_node(&adapter->pdev->dev);
2630
2631                 aic = &adapter->aic_obj[i];
2632                 eqo->adapter = adapter;
2633                 eqo->idx = i;
2634                 aic->max_eqd = BE_MAX_EQD;
2635                 aic->enable = true;
2636
2637                 eq = &eqo->q;
2638                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2639                                     sizeof(struct be_eq_entry));
2640                 if (rc)
2641                         return rc;
2642
2643                 rc = be_cmd_eq_create(adapter, eqo);
2644                 if (rc)
2645                         return rc;
2646
2647                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2648                         return -ENOMEM;
2649                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2650                                 eqo->affinity_mask);
2651                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2652                                BE_NAPI_WEIGHT);
2653         }
2654         return 0;
2655 }
2656
2657 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2658 {
2659         struct be_queue_info *q;
2660
2661         q = &adapter->mcc_obj.q;
2662         if (q->created)
2663                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2664         be_queue_free(adapter, q);
2665
2666         q = &adapter->mcc_obj.cq;
2667         if (q->created)
2668                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2669         be_queue_free(adapter, q);
2670 }
2671
2672 /* Must be called only after TX qs are created as MCC shares TX EQ */
2673 static int be_mcc_queues_create(struct be_adapter *adapter)
2674 {
2675         struct be_queue_info *q, *cq;
2676
2677         cq = &adapter->mcc_obj.cq;
2678         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2679                            sizeof(struct be_mcc_compl)))
2680                 goto err;
2681
2682         /* Use the default EQ for MCC completions */
2683         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2684                 goto mcc_cq_free;
2685
2686         q = &adapter->mcc_obj.q;
2687         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2688                 goto mcc_cq_destroy;
2689
2690         if (be_cmd_mccq_create(adapter, q, cq))
2691                 goto mcc_q_free;
2692
2693         return 0;
2694
2695 mcc_q_free:
2696         be_queue_free(adapter, q);
2697 mcc_cq_destroy:
2698         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2699 mcc_cq_free:
2700         be_queue_free(adapter, cq);
2701 err:
2702         return -1;
2703 }
2704
2705 static void be_tx_queues_destroy(struct be_adapter *adapter)
2706 {
2707         struct be_queue_info *q;
2708         struct be_tx_obj *txo;
2709         u8 i;
2710
2711         for_all_tx_queues(adapter, txo, i) {
2712                 q = &txo->q;
2713                 if (q->created)
2714                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2715                 be_queue_free(adapter, q);
2716
2717                 q = &txo->cq;
2718                 if (q->created)
2719                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2720                 be_queue_free(adapter, q);
2721         }
2722 }
2723
2724 static int be_tx_qs_create(struct be_adapter *adapter)
2725 {
2726         struct be_queue_info *cq;
2727         struct be_tx_obj *txo;
2728         struct be_eq_obj *eqo;
2729         int status, i;
2730
2731         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2732
2733         for_all_tx_queues(adapter, txo, i) {
2734                 cq = &txo->cq;
2735                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2736                                         sizeof(struct be_eth_tx_compl));
2737                 if (status)
2738                         return status;
2739
2740                 u64_stats_init(&txo->stats.sync);
2741                 u64_stats_init(&txo->stats.sync_compl);
2742
2743                 /* If num_evt_qs is less than num_tx_qs, then more than
2744                  * one txq share an eq
2745                  */
2746                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2747                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2748                 if (status)
2749                         return status;
2750
2751                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2752                                         sizeof(struct be_eth_wrb));
2753                 if (status)
2754                         return status;
2755
2756                 status = be_cmd_txq_create(adapter, txo);
2757                 if (status)
2758                         return status;
2759
2760                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2761                                     eqo->idx);
2762         }
2763
2764         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2765                  adapter->num_tx_qs);
2766         return 0;
2767 }
2768
2769 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2770 {
2771         struct be_queue_info *q;
2772         struct be_rx_obj *rxo;
2773         int i;
2774
2775         for_all_rx_queues(adapter, rxo, i) {
2776                 q = &rxo->cq;
2777                 if (q->created)
2778                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2779                 be_queue_free(adapter, q);
2780         }
2781 }
2782
2783 static int be_rx_cqs_create(struct be_adapter *adapter)
2784 {
2785         struct be_queue_info *eq, *cq;
2786         struct be_rx_obj *rxo;
2787         int rc, i;
2788
2789         adapter->num_rss_qs =
2790                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2791
2792         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2793         if (adapter->num_rss_qs < 2)
2794                 adapter->num_rss_qs = 0;
2795
2796         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2797
2798         /* When the interface is not capable of RSS rings (and there is no
2799          * need to create a default RXQ) we'll still need one RXQ
2800          */
2801         if (adapter->num_rx_qs == 0)
2802                 adapter->num_rx_qs = 1;
2803
2804         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2805         for_all_rx_queues(adapter, rxo, i) {
2806                 rxo->adapter = adapter;
2807                 cq = &rxo->cq;
2808                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
2809                                     sizeof(struct be_eth_rx_compl));
2810                 if (rc)
2811                         return rc;
2812
2813                 u64_stats_init(&rxo->stats.sync);
2814                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
2815                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
2816                 if (rc)
2817                         return rc;
2818         }
2819
2820         dev_info(&adapter->pdev->dev,
2821                  "created %d RX queue(s)\n", adapter->num_rx_qs);
2822         return 0;
2823 }
2824
2825 static irqreturn_t be_intx(int irq, void *dev)
2826 {
2827         struct be_eq_obj *eqo = dev;
2828         struct be_adapter *adapter = eqo->adapter;
2829         int num_evts = 0;
2830
2831         /* IRQ is not expected when NAPI is scheduled as the EQ
2832          * will not be armed.
2833          * But, this can happen on Lancer INTx where it takes
2834          * a while to de-assert INTx or in BE2 where occasionaly
2835          * an interrupt may be raised even when EQ is unarmed.
2836          * If NAPI is already scheduled, then counting & notifying
2837          * events will orphan them.
2838          */
2839         if (napi_schedule_prep(&eqo->napi)) {
2840                 num_evts = events_get(eqo);
2841                 __napi_schedule(&eqo->napi);
2842                 if (num_evts)
2843                         eqo->spurious_intr = 0;
2844         }
2845         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
2846
2847         /* Return IRQ_HANDLED only for the the first spurious intr
2848          * after a valid intr to stop the kernel from branding
2849          * this irq as a bad one!
2850          */
2851         if (num_evts || eqo->spurious_intr++ == 0)
2852                 return IRQ_HANDLED;
2853         else
2854                 return IRQ_NONE;
2855 }
2856
2857 static irqreturn_t be_msix(int irq, void *dev)
2858 {
2859         struct be_eq_obj *eqo = dev;
2860
2861         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
2862         napi_schedule(&eqo->napi);
2863         return IRQ_HANDLED;
2864 }
2865
2866 static inline bool do_gro(struct be_rx_compl_info *rxcp)
2867 {
2868         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
2869 }
2870
2871 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
2872                          int budget, int polling)
2873 {
2874         struct be_adapter *adapter = rxo->adapter;
2875         struct be_queue_info *rx_cq = &rxo->cq;
2876         struct be_rx_compl_info *rxcp;
2877         u32 work_done;
2878         u32 frags_consumed = 0;
2879
2880         for (work_done = 0; work_done < budget; work_done++) {
2881                 rxcp = be_rx_compl_get(rxo);
2882                 if (!rxcp)
2883                         break;
2884
2885                 /* Is it a flush compl that has no data */
2886                 if (unlikely(rxcp->num_rcvd == 0))
2887                         goto loop_continue;
2888
2889                 /* Discard compl with partial DMA Lancer B0 */
2890                 if (unlikely(!rxcp->pkt_size)) {
2891                         be_rx_compl_discard(rxo, rxcp);
2892                         goto loop_continue;
2893                 }
2894
2895                 /* On BE drop pkts that arrive due to imperfect filtering in
2896                  * promiscuous mode on some skews
2897                  */
2898                 if (unlikely(rxcp->port != adapter->port_num &&
2899                              !lancer_chip(adapter))) {
2900                         be_rx_compl_discard(rxo, rxcp);
2901                         goto loop_continue;
2902                 }
2903
2904                 /* Don't do gro when we're busy_polling */
2905                 if (do_gro(rxcp) && polling != BUSY_POLLING)
2906                         be_rx_compl_process_gro(rxo, napi, rxcp);
2907                 else
2908                         be_rx_compl_process(rxo, napi, rxcp);
2909
2910 loop_continue:
2911                 frags_consumed += rxcp->num_rcvd;
2912                 be_rx_stats_update(rxo, rxcp);
2913         }
2914
2915         if (work_done) {
2916                 be_cq_notify(adapter, rx_cq->id, true, work_done);
2917
2918                 /* When an rx-obj gets into post_starved state, just
2919                  * let be_worker do the posting.
2920                  */
2921                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
2922                     !rxo->rx_post_starved)
2923                         be_post_rx_frags(rxo, GFP_ATOMIC,
2924                                          max_t(u32, MAX_RX_POST,
2925                                                frags_consumed));
2926         }
2927
2928         return work_done;
2929 }
2930
2931 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2932 {
2933         switch (status) {
2934         case BE_TX_COMP_HDR_PARSE_ERR:
2935                 tx_stats(txo)->tx_hdr_parse_err++;
2936                 break;
2937         case BE_TX_COMP_NDMA_ERR:
2938                 tx_stats(txo)->tx_dma_err++;
2939                 break;
2940         case BE_TX_COMP_ACL_ERR:
2941                 tx_stats(txo)->tx_spoof_check_err++;
2942                 break;
2943         }
2944 }
2945
2946 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2947 {
2948         switch (status) {
2949         case LANCER_TX_COMP_LSO_ERR:
2950                 tx_stats(txo)->tx_tso_err++;
2951                 break;
2952         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2953         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2954                 tx_stats(txo)->tx_spoof_check_err++;
2955                 break;
2956         case LANCER_TX_COMP_QINQ_ERR:
2957                 tx_stats(txo)->tx_qinq_err++;
2958                 break;
2959         case LANCER_TX_COMP_PARITY_ERR:
2960                 tx_stats(txo)->tx_internal_parity_err++;
2961                 break;
2962         case LANCER_TX_COMP_DMA_ERR:
2963                 tx_stats(txo)->tx_dma_err++;
2964                 break;
2965         }
2966 }
2967
2968 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
2969                           int idx)
2970 {
2971         int num_wrbs = 0, work_done = 0;
2972         struct be_tx_compl_info *txcp;
2973
2974         while ((txcp = be_tx_compl_get(txo))) {
2975                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
2976                 work_done++;
2977
2978                 if (txcp->status) {
2979                         if (lancer_chip(adapter))
2980                                 lancer_update_tx_err(txo, txcp->status);
2981                         else
2982                                 be_update_tx_err(txo, txcp->status);
2983                 }
2984         }
2985
2986         if (work_done) {
2987                 be_cq_notify(adapter, txo->cq.id, true, work_done);
2988                 atomic_sub(num_wrbs, &txo->q.used);
2989
2990                 /* As Tx wrbs have been freed up, wake up netdev queue
2991                  * if it was stopped due to lack of tx wrbs.  */
2992                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
2993                     be_can_txq_wake(txo)) {
2994                         netif_wake_subqueue(adapter->netdev, idx);
2995                 }
2996
2997                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
2998                 tx_stats(txo)->tx_compl += work_done;
2999                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3000         }
3001 }
3002
3003 #ifdef CONFIG_NET_RX_BUSY_POLL
3004 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3005 {
3006         bool status = true;
3007
3008         spin_lock(&eqo->lock); /* BH is already disabled */
3009         if (eqo->state & BE_EQ_LOCKED) {
3010                 WARN_ON(eqo->state & BE_EQ_NAPI);
3011                 eqo->state |= BE_EQ_NAPI_YIELD;
3012                 status = false;
3013         } else {
3014                 eqo->state = BE_EQ_NAPI;
3015         }
3016         spin_unlock(&eqo->lock);
3017         return status;
3018 }
3019
3020 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3021 {
3022         spin_lock(&eqo->lock); /* BH is already disabled */
3023
3024         WARN_ON(eqo->state & (BE_EQ_POLL | BE_EQ_NAPI_YIELD));
3025         eqo->state = BE_EQ_IDLE;
3026
3027         spin_unlock(&eqo->lock);
3028 }
3029
3030 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3031 {
3032         bool status = true;
3033
3034         spin_lock_bh(&eqo->lock);
3035         if (eqo->state & BE_EQ_LOCKED) {
3036                 eqo->state |= BE_EQ_POLL_YIELD;
3037                 status = false;
3038         } else {
3039                 eqo->state |= BE_EQ_POLL;
3040         }
3041         spin_unlock_bh(&eqo->lock);
3042         return status;
3043 }
3044
3045 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3046 {
3047         spin_lock_bh(&eqo->lock);
3048
3049         WARN_ON(eqo->state & (BE_EQ_NAPI));
3050         eqo->state = BE_EQ_IDLE;
3051
3052         spin_unlock_bh(&eqo->lock);
3053 }
3054
3055 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3056 {
3057         spin_lock_init(&eqo->lock);
3058         eqo->state = BE_EQ_IDLE;
3059 }
3060
3061 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3062 {
3063         local_bh_disable();
3064
3065         /* It's enough to just acquire napi lock on the eqo to stop
3066          * be_busy_poll() from processing any queueus.
3067          */
3068         while (!be_lock_napi(eqo))
3069                 mdelay(1);
3070
3071         local_bh_enable();
3072 }
3073
3074 #else /* CONFIG_NET_RX_BUSY_POLL */
3075
3076 static inline bool be_lock_napi(struct be_eq_obj *eqo)
3077 {
3078         return true;
3079 }
3080
3081 static inline void be_unlock_napi(struct be_eq_obj *eqo)
3082 {
3083 }
3084
3085 static inline bool be_lock_busy_poll(struct be_eq_obj *eqo)
3086 {
3087         return false;
3088 }
3089
3090 static inline void be_unlock_busy_poll(struct be_eq_obj *eqo)
3091 {
3092 }
3093
3094 static inline void be_enable_busy_poll(struct be_eq_obj *eqo)
3095 {
3096 }
3097
3098 static inline void be_disable_busy_poll(struct be_eq_obj *eqo)
3099 {
3100 }
3101 #endif /* CONFIG_NET_RX_BUSY_POLL */
3102
3103 int be_poll(struct napi_struct *napi, int budget)
3104 {
3105         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3106         struct be_adapter *adapter = eqo->adapter;
3107         int max_work = 0, work, i, num_evts;
3108         struct be_rx_obj *rxo;
3109         struct be_tx_obj *txo;
3110         u32 mult_enc = 0;
3111
3112         num_evts = events_get(eqo);
3113
3114         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3115                 be_process_tx(adapter, txo, i);
3116
3117         if (be_lock_napi(eqo)) {
3118                 /* This loop will iterate twice for EQ0 in which
3119                  * completions of the last RXQ (default one) are also processed
3120                  * For other EQs the loop iterates only once
3121                  */
3122                 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3123                         work = be_process_rx(rxo, napi, budget, NAPI_POLLING);
3124                         max_work = max(work, max_work);
3125                 }
3126                 be_unlock_napi(eqo);
3127         } else {
3128                 max_work = budget;
3129         }
3130
3131         if (is_mcc_eqo(eqo))
3132                 be_process_mcc(adapter);
3133
3134         if (max_work < budget) {
3135                 napi_complete(napi);
3136
3137                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3138                  * delay via a delay multiplier encoding value
3139                  */
3140                 if (skyhawk_chip(adapter))
3141                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3142
3143                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3144                              mult_enc);
3145         } else {
3146                 /* As we'll continue in polling mode, count and clear events */
3147                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3148         }
3149         return max_work;
3150 }
3151
3152 #ifdef CONFIG_NET_RX_BUSY_POLL
3153 static int be_busy_poll(struct napi_struct *napi)
3154 {
3155         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3156         struct be_adapter *adapter = eqo->adapter;
3157         struct be_rx_obj *rxo;
3158         int i, work = 0;
3159
3160         if (!be_lock_busy_poll(eqo))
3161                 return LL_FLUSH_BUSY;
3162
3163         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3164                 work = be_process_rx(rxo, napi, 4, BUSY_POLLING);
3165                 if (work)
3166                         break;
3167         }
3168
3169         be_unlock_busy_poll(eqo);
3170         return work;
3171 }
3172 #endif
3173
3174 void be_detect_error(struct be_adapter *adapter)
3175 {
3176         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3177         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3178         u32 i;
3179         struct device *dev = &adapter->pdev->dev;
3180
3181         if (be_check_error(adapter, BE_ERROR_HW))
3182                 return;
3183
3184         if (lancer_chip(adapter)) {
3185                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3186                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3187                         be_set_error(adapter, BE_ERROR_UE);
3188                         sliport_err1 = ioread32(adapter->db +
3189                                                 SLIPORT_ERROR1_OFFSET);
3190                         sliport_err2 = ioread32(adapter->db +
3191                                                 SLIPORT_ERROR2_OFFSET);
3192                         /* Do not log error messages if its a FW reset */
3193                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3194                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3195                                 dev_info(dev, "Firmware update in progress\n");
3196                         } else {
3197                                 dev_err(dev, "Error detected in the card\n");
3198                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3199                                         sliport_status);
3200                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3201                                         sliport_err1);
3202                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3203                                         sliport_err2);
3204                         }
3205                 }
3206         } else {
3207                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3208                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3209                 ue_lo_mask = ioread32(adapter->pcicfg +
3210                                       PCICFG_UE_STATUS_LOW_MASK);
3211                 ue_hi_mask = ioread32(adapter->pcicfg +
3212                                       PCICFG_UE_STATUS_HI_MASK);
3213
3214                 ue_lo = (ue_lo & ~ue_lo_mask);
3215                 ue_hi = (ue_hi & ~ue_hi_mask);
3216
3217                 /* On certain platforms BE hardware can indicate spurious UEs.
3218                  * Allow HW to stop working completely in case of a real UE.
3219                  * Hence not setting the hw_error for UE detection.
3220                  */
3221
3222                 if (ue_lo || ue_hi) {
3223                         dev_err(dev,
3224                                 "Unrecoverable Error detected in the adapter");
3225                         dev_err(dev, "Please reboot server to recover");
3226                         if (skyhawk_chip(adapter))
3227                                 be_set_error(adapter, BE_ERROR_UE);
3228
3229                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3230                                 if (ue_lo & 1)
3231                                         dev_err(dev, "UE: %s bit set\n",
3232                                                 ue_status_low_desc[i]);
3233                         }
3234                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3235                                 if (ue_hi & 1)
3236                                         dev_err(dev, "UE: %s bit set\n",
3237                                                 ue_status_hi_desc[i]);
3238                         }
3239                 }
3240         }
3241 }
3242
3243 static void be_msix_disable(struct be_adapter *adapter)
3244 {
3245         if (msix_enabled(adapter)) {
3246                 pci_disable_msix(adapter->pdev);
3247                 adapter->num_msix_vec = 0;
3248                 adapter->num_msix_roce_vec = 0;
3249         }
3250 }
3251
3252 static int be_msix_enable(struct be_adapter *adapter)
3253 {
3254         unsigned int i, max_roce_eqs;
3255         struct device *dev = &adapter->pdev->dev;
3256         int num_vec;
3257
3258         /* If RoCE is supported, program the max number of vectors that
3259          * could be used for NIC and RoCE, else, just program the number
3260          * we'll use initially.
3261          */
3262         if (be_roce_supported(adapter)) {
3263                 max_roce_eqs =
3264                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3265                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3266                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3267         } else {
3268                 num_vec = max(adapter->cfg_num_rx_irqs,
3269                               adapter->cfg_num_tx_irqs);
3270         }
3271
3272         for (i = 0; i < num_vec; i++)
3273                 adapter->msix_entries[i].entry = i;
3274
3275         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3276                                         MIN_MSIX_VECTORS, num_vec);
3277         if (num_vec < 0)
3278                 goto fail;
3279
3280         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3281                 adapter->num_msix_roce_vec = num_vec / 2;
3282                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3283                          adapter->num_msix_roce_vec);
3284         }
3285
3286         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3287
3288         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3289                  adapter->num_msix_vec);
3290         return 0;
3291
3292 fail:
3293         dev_warn(dev, "MSIx enable failed\n");
3294
3295         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3296         if (be_virtfn(adapter))
3297                 return num_vec;
3298         return 0;
3299 }
3300
3301 static inline int be_msix_vec_get(struct be_adapter *adapter,
3302                                   struct be_eq_obj *eqo)
3303 {
3304         return adapter->msix_entries[eqo->msix_idx].vector;
3305 }
3306
3307 static int be_msix_register(struct be_adapter *adapter)
3308 {
3309         struct net_device *netdev = adapter->netdev;
3310         struct be_eq_obj *eqo;
3311         int status, i, vec;
3312
3313         for_all_evt_queues(adapter, eqo, i) {
3314                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3315                 vec = be_msix_vec_get(adapter, eqo);
3316                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3317                 if (status)
3318                         goto err_msix;
3319
3320                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3321         }
3322
3323         return 0;
3324 err_msix:
3325         for (i--; i >= 0; i--) {
3326                 eqo = &adapter->eq_obj[i];
3327                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3328         }
3329         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3330                  status);
3331         be_msix_disable(adapter);
3332         return status;
3333 }
3334
3335 static int be_irq_register(struct be_adapter *adapter)
3336 {
3337         struct net_device *netdev = adapter->netdev;
3338         int status;
3339
3340         if (msix_enabled(adapter)) {
3341                 status = be_msix_register(adapter);
3342                 if (status == 0)
3343                         goto done;
3344                 /* INTx is not supported for VF */
3345                 if (be_virtfn(adapter))
3346                         return status;
3347         }
3348
3349         /* INTx: only the first EQ is used */
3350         netdev->irq = adapter->pdev->irq;
3351         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3352                              &adapter->eq_obj[0]);
3353         if (status) {
3354                 dev_err(&adapter->pdev->dev,
3355                         "INTx request IRQ failed - err %d\n", status);
3356                 return status;
3357         }
3358 done:
3359         adapter->isr_registered = true;
3360         return 0;
3361 }
3362
3363 static void be_irq_unregister(struct be_adapter *adapter)
3364 {
3365         struct net_device *netdev = adapter->netdev;
3366         struct be_eq_obj *eqo;
3367         int i, vec;
3368
3369         if (!adapter->isr_registered)
3370                 return;
3371
3372         /* INTx */
3373         if (!msix_enabled(adapter)) {
3374                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3375                 goto done;
3376         }
3377
3378         /* MSIx */
3379         for_all_evt_queues(adapter, eqo, i) {
3380                 vec = be_msix_vec_get(adapter, eqo);
3381                 irq_set_affinity_hint(vec, NULL);
3382                 free_irq(vec, eqo);
3383         }
3384
3385 done:
3386         adapter->isr_registered = false;
3387 }
3388
3389 static void be_rx_qs_destroy(struct be_adapter *adapter)
3390 {
3391         struct rss_info *rss = &adapter->rss_info;
3392         struct be_queue_info *q;
3393         struct be_rx_obj *rxo;
3394         int i;
3395
3396         for_all_rx_queues(adapter, rxo, i) {
3397                 q = &rxo->q;
3398                 if (q->created) {
3399                         /* If RXQs are destroyed while in an "out of buffer"
3400                          * state, there is a possibility of an HW stall on
3401                          * Lancer. So, post 64 buffers to each queue to relieve
3402                          * the "out of buffer" condition.
3403                          * Make sure there's space in the RXQ before posting.
3404                          */
3405                         if (lancer_chip(adapter)) {
3406                                 be_rx_cq_clean(rxo);
3407                                 if (atomic_read(&q->used) == 0)
3408                                         be_post_rx_frags(rxo, GFP_KERNEL,
3409                                                          MAX_RX_POST);
3410                         }
3411
3412                         be_cmd_rxq_destroy(adapter, q);
3413                         be_rx_cq_clean(rxo);
3414                         be_rxq_clean(rxo);
3415                 }
3416                 be_queue_free(adapter, q);
3417         }
3418
3419         if (rss->rss_flags) {
3420                 rss->rss_flags = RSS_ENABLE_NONE;
3421                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3422                                   128, rss->rss_hkey);
3423         }
3424 }
3425
3426 static void be_disable_if_filters(struct be_adapter *adapter)
3427 {
3428         be_cmd_pmac_del(adapter, adapter->if_handle,
3429                         adapter->pmac_id[0], 0);
3430
3431         be_clear_uc_list(adapter);
3432
3433         /* The IFACE flags are enabled in the open path and cleared
3434          * in the close path. When a VF gets detached from the host and
3435          * assigned to a VM the following happens:
3436          *      - VF's IFACE flags get cleared in the detach path
3437          *      - IFACE create is issued by the VF in the attach path
3438          * Due to a bug in the BE3/Skyhawk-R FW
3439          * (Lancer FW doesn't have the bug), the IFACE capability flags
3440          * specified along with the IFACE create cmd issued by a VF are not
3441          * honoured by FW.  As a consequence, if a *new* driver
3442          * (that enables/disables IFACE flags in open/close)
3443          * is loaded in the host and an *old* driver is * used by a VM/VF,
3444          * the IFACE gets created *without* the needed flags.
3445          * To avoid this, disable RX-filter flags only for Lancer.
3446          */
3447         if (lancer_chip(adapter)) {
3448                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3449                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3450         }
3451 }
3452
3453 static int be_close(struct net_device *netdev)
3454 {
3455         struct be_adapter *adapter = netdev_priv(netdev);
3456         struct be_eq_obj *eqo;
3457         int i;
3458
3459         /* This protection is needed as be_close() may be called even when the
3460          * adapter is in cleared state (after eeh perm failure)
3461          */
3462         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3463                 return 0;
3464
3465         be_disable_if_filters(adapter);
3466
3467         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3468                 for_all_evt_queues(adapter, eqo, i) {
3469                         napi_disable(&eqo->napi);
3470                         be_disable_busy_poll(eqo);
3471                 }
3472                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3473         }
3474
3475         be_async_mcc_disable(adapter);
3476
3477         /* Wait for all pending tx completions to arrive so that
3478          * all tx skbs are freed.
3479          */
3480         netif_tx_disable(netdev);
3481         be_tx_compl_clean(adapter);
3482
3483         be_rx_qs_destroy(adapter);
3484
3485         for_all_evt_queues(adapter, eqo, i) {
3486                 if (msix_enabled(adapter))
3487                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3488                 else
3489                         synchronize_irq(netdev->irq);
3490                 be_eq_clean(eqo);
3491         }
3492
3493         be_irq_unregister(adapter);
3494
3495         return 0;
3496 }
3497
3498 static int be_rx_qs_create(struct be_adapter *adapter)
3499 {
3500         struct rss_info *rss = &adapter->rss_info;
3501         u8 rss_key[RSS_HASH_KEY_LEN];
3502         struct be_rx_obj *rxo;
3503         int rc, i, j;
3504
3505         for_all_rx_queues(adapter, rxo, i) {
3506                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3507                                     sizeof(struct be_eth_rx_d));
3508                 if (rc)
3509                         return rc;
3510         }
3511
3512         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3513                 rxo = default_rxo(adapter);
3514                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3515                                        rx_frag_size, adapter->if_handle,
3516                                        false, &rxo->rss_id);
3517                 if (rc)
3518                         return rc;
3519         }
3520
3521         for_all_rss_queues(adapter, rxo, i) {
3522                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3523                                        rx_frag_size, adapter->if_handle,
3524                                        true, &rxo->rss_id);
3525                 if (rc)
3526                         return rc;
3527         }
3528
3529         if (be_multi_rxq(adapter)) {
3530                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3531                         for_all_rss_queues(adapter, rxo, i) {
3532                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3533                                         break;
3534                                 rss->rsstable[j + i] = rxo->rss_id;
3535                                 rss->rss_queue[j + i] = i;
3536                         }
3537                 }
3538                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3539                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3540
3541                 if (!BEx_chip(adapter))
3542                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3543                                 RSS_ENABLE_UDP_IPV6;
3544
3545                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3546                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3547                                        RSS_INDIR_TABLE_LEN, rss_key);
3548                 if (rc) {
3549                         rss->rss_flags = RSS_ENABLE_NONE;
3550                         return rc;
3551                 }
3552
3553                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3554         } else {
3555                 /* Disable RSS, if only default RX Q is created */
3556                 rss->rss_flags = RSS_ENABLE_NONE;
3557         }
3558
3559
3560         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3561          * which is a queue empty condition
3562          */
3563         for_all_rx_queues(adapter, rxo, i)
3564                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3565
3566         return 0;
3567 }
3568
3569 static int be_enable_if_filters(struct be_adapter *adapter)
3570 {
3571         int status;
3572
3573         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3574         if (status)
3575                 return status;
3576
3577         /* For BE3 VFs, the PF programs the initial MAC address */
3578         if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
3579                 status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
3580                                          adapter->if_handle,
3581                                          &adapter->pmac_id[0], 0);
3582                 if (status)
3583                         return status;
3584         }
3585
3586         if (adapter->vlans_added)
3587                 be_vid_config(adapter);
3588
3589         be_set_rx_mode(adapter->netdev);
3590
3591         return 0;
3592 }
3593
3594 static int be_open(struct net_device *netdev)
3595 {
3596         struct be_adapter *adapter = netdev_priv(netdev);
3597         struct be_eq_obj *eqo;
3598         struct be_rx_obj *rxo;
3599         struct be_tx_obj *txo;
3600         u8 link_status;
3601         int status, i;
3602
3603         status = be_rx_qs_create(adapter);
3604         if (status)
3605                 goto err;
3606
3607         status = be_enable_if_filters(adapter);
3608         if (status)
3609                 goto err;
3610
3611         status = be_irq_register(adapter);
3612         if (status)
3613                 goto err;
3614
3615         for_all_rx_queues(adapter, rxo, i)
3616                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3617
3618         for_all_tx_queues(adapter, txo, i)
3619                 be_cq_notify(adapter, txo->cq.id, true, 0);
3620
3621         be_async_mcc_enable(adapter);
3622
3623         for_all_evt_queues(adapter, eqo, i) {
3624                 napi_enable(&eqo->napi);
3625                 be_enable_busy_poll(eqo);
3626                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3627         }
3628         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3629
3630         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3631         if (!status)
3632                 be_link_status_update(adapter, link_status);
3633
3634         netif_tx_start_all_queues(netdev);
3635         if (skyhawk_chip(adapter))
3636                 udp_tunnel_get_rx_info(netdev);
3637
3638         return 0;
3639 err:
3640         be_close(adapter->netdev);
3641         return -EIO;
3642 }
3643
3644 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3645 {
3646         u32 addr;
3647
3648         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3649
3650         mac[5] = (u8)(addr & 0xFF);
3651         mac[4] = (u8)((addr >> 8) & 0xFF);
3652         mac[3] = (u8)((addr >> 16) & 0xFF);
3653         /* Use the OUI from the current MAC address */
3654         memcpy(mac, adapter->netdev->dev_addr, 3);
3655 }
3656
3657 /*
3658  * Generate a seed MAC address from the PF MAC Address using jhash.
3659  * MAC Address for VFs are assigned incrementally starting from the seed.
3660  * These addresses are programmed in the ASIC by the PF and the VF driver
3661  * queries for the MAC address during its probe.
3662  */
3663 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3664 {
3665         u32 vf;
3666         int status = 0;
3667         u8 mac[ETH_ALEN];
3668         struct be_vf_cfg *vf_cfg;
3669
3670         be_vf_eth_addr_generate(adapter, mac);
3671
3672         for_all_vfs(adapter, vf_cfg, vf) {
3673                 if (BEx_chip(adapter))
3674                         status = be_cmd_pmac_add(adapter, mac,
3675                                                  vf_cfg->if_handle,
3676                                                  &vf_cfg->pmac_id, vf + 1);
3677                 else
3678                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3679                                                 vf + 1);
3680
3681                 if (status)
3682                         dev_err(&adapter->pdev->dev,
3683                                 "Mac address assignment failed for VF %d\n",
3684                                 vf);
3685                 else
3686                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3687
3688                 mac[5] += 1;
3689         }
3690         return status;
3691 }
3692
3693 static int be_vfs_mac_query(struct be_adapter *adapter)
3694 {
3695         int status, vf;
3696         u8 mac[ETH_ALEN];
3697         struct be_vf_cfg *vf_cfg;
3698
3699         for_all_vfs(adapter, vf_cfg, vf) {
3700                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3701                                                mac, vf_cfg->if_handle,
3702                                                false, vf+1);
3703                 if (status)
3704                         return status;
3705                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3706         }
3707         return 0;
3708 }
3709
3710 static void be_vf_clear(struct be_adapter *adapter)
3711 {
3712         struct be_vf_cfg *vf_cfg;
3713         u32 vf;
3714
3715         if (pci_vfs_assigned(adapter->pdev)) {
3716                 dev_warn(&adapter->pdev->dev,
3717                          "VFs are assigned to VMs: not disabling VFs\n");
3718                 goto done;
3719         }
3720
3721         pci_disable_sriov(adapter->pdev);
3722
3723         for_all_vfs(adapter, vf_cfg, vf) {
3724                 if (BEx_chip(adapter))
3725                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3726                                         vf_cfg->pmac_id, vf + 1);
3727                 else
3728                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3729                                        vf + 1);
3730
3731                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3732         }
3733
3734         if (BE3_chip(adapter))
3735                 be_cmd_set_hsw_config(adapter, 0, 0,
3736                                       adapter->if_handle,
3737                                       PORT_FWD_TYPE_PASSTHRU, 0);
3738 done:
3739         kfree(adapter->vf_cfg);
3740         adapter->num_vfs = 0;
3741         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3742 }
3743
3744 static void be_clear_queues(struct be_adapter *adapter)
3745 {
3746         be_mcc_queues_destroy(adapter);
3747         be_rx_cqs_destroy(adapter);
3748         be_tx_queues_destroy(adapter);
3749         be_evt_queues_destroy(adapter);
3750 }
3751
3752 static void be_cancel_worker(struct be_adapter *adapter)
3753 {
3754         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3755                 cancel_delayed_work_sync(&adapter->work);
3756                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3757         }
3758 }
3759
3760 static void be_cancel_err_detection(struct be_adapter *adapter)
3761 {
3762         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3763                 cancel_delayed_work_sync(&adapter->be_err_detection_work);
3764                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3765         }
3766 }
3767
3768 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3769 {
3770         struct net_device *netdev = adapter->netdev;
3771
3772         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3773                 be_cmd_manage_iface(adapter, adapter->if_handle,
3774                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3775
3776         if (adapter->vxlan_port)
3777                 be_cmd_set_vxlan_port(adapter, 0);
3778
3779         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3780         adapter->vxlan_port = 0;
3781
3782         netdev->hw_enc_features = 0;
3783         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3784         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3785 }
3786
3787 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3788                                 struct be_resources *vft_res)
3789 {
3790         struct be_resources res = adapter->pool_res;
3791         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3792         struct be_resources res_mod = {0};
3793         u16 num_vf_qs = 1;
3794
3795         /* Distribute the queue resources among the PF and it's VFs */
3796         if (num_vfs) {
3797                 /* Divide the rx queues evenly among the VFs and the PF, capped
3798                  * at VF-EQ-count. Any remainder queues belong to the PF.
3799                  */
3800                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3801                                 res.max_rss_qs / (num_vfs + 1));
3802
3803                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3804                  * RSS Tables per port. Provide RSS on VFs, only if number of
3805                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3806                  */
3807                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3808                         num_vf_qs = 1;
3809         }
3810
3811         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3812          * which are modifiable using SET_PROFILE_CONFIG cmd.
3813          */
3814         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3815                                   RESOURCE_MODIFIABLE, 0);
3816
3817         /* If RSS IFACE capability flags are modifiable for a VF, set the
3818          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3819          * more than 1 RSSQ is available for a VF.
3820          * Otherwise, provision only 1 queue pair for VF.
3821          */
3822         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3823                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3824                 if (num_vf_qs > 1) {
3825                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3826                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3827                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3828                 } else {
3829                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3830                                              BE_IF_FLAGS_DEFQ_RSS);
3831                 }
3832         } else {
3833                 num_vf_qs = 1;
3834         }
3835
3836         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3837                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3838                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3839         }
3840
3841         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3842         vft_res->max_rx_qs = num_vf_qs;
3843         vft_res->max_rss_qs = num_vf_qs;
3844         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3845         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3846
3847         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3848          * among the PF and it's VFs, if the fields are changeable
3849          */
3850         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3851                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3852
3853         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3854                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3855
3856         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3857                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3858
3859         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
3860                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
3861 }
3862
3863 static int be_clear(struct be_adapter *adapter)
3864 {
3865         struct pci_dev *pdev = adapter->pdev;
3866         struct  be_resources vft_res = {0};
3867
3868         be_cancel_worker(adapter);
3869
3870         if (sriov_enabled(adapter))
3871                 be_vf_clear(adapter);
3872
3873         /* Re-configure FW to distribute resources evenly across max-supported
3874          * number of VFs, only when VFs are not already enabled.
3875          */
3876         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
3877             !pci_vfs_assigned(pdev)) {
3878                 be_calculate_vf_res(adapter,
3879                                     pci_sriov_get_totalvfs(pdev),
3880                                     &vft_res);
3881                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
3882                                         pci_sriov_get_totalvfs(pdev),
3883                                         &vft_res);
3884         }
3885
3886         be_disable_vxlan_offloads(adapter);
3887         kfree(adapter->pmac_id);
3888         adapter->pmac_id = NULL;
3889
3890         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
3891
3892         be_clear_queues(adapter);
3893
3894         be_msix_disable(adapter);
3895         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
3896         return 0;
3897 }
3898
3899 static int be_vfs_if_create(struct be_adapter *adapter)
3900 {
3901         struct be_resources res = {0};
3902         u32 cap_flags, en_flags, vf;
3903         struct be_vf_cfg *vf_cfg;
3904         int status;
3905
3906         /* If a FW profile exists, then cap_flags are updated */
3907         cap_flags = BE_VF_IF_EN_FLAGS;
3908
3909         for_all_vfs(adapter, vf_cfg, vf) {
3910                 if (!BE3_chip(adapter)) {
3911                         status = be_cmd_get_profile_config(adapter, &res, NULL,
3912                                                            ACTIVE_PROFILE_TYPE,
3913                                                            RESOURCE_LIMITS,
3914                                                            vf + 1);
3915                         if (!status) {
3916                                 cap_flags = res.if_cap_flags;
3917                                 /* Prevent VFs from enabling VLAN promiscuous
3918                                  * mode
3919                                  */
3920                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3921                         }
3922                 }
3923
3924                 /* PF should enable IF flags during proxy if_create call */
3925                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
3926                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
3927                                           &vf_cfg->if_handle, vf + 1);
3928                 if (status)
3929                         return status;
3930         }
3931
3932         return 0;
3933 }
3934
3935 static int be_vf_setup_init(struct be_adapter *adapter)
3936 {
3937         struct be_vf_cfg *vf_cfg;
3938         int vf;
3939
3940         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
3941                                   GFP_KERNEL);
3942         if (!adapter->vf_cfg)
3943                 return -ENOMEM;
3944
3945         for_all_vfs(adapter, vf_cfg, vf) {
3946                 vf_cfg->if_handle = -1;
3947                 vf_cfg->pmac_id = -1;
3948         }
3949         return 0;
3950 }
3951
3952 static int be_vf_setup(struct be_adapter *adapter)
3953 {
3954         struct device *dev = &adapter->pdev->dev;
3955         struct be_vf_cfg *vf_cfg;
3956         int status, old_vfs, vf;
3957         bool spoofchk;
3958
3959         old_vfs = pci_num_vf(adapter->pdev);
3960
3961         status = be_vf_setup_init(adapter);
3962         if (status)
3963                 goto err;
3964
3965         if (old_vfs) {
3966                 for_all_vfs(adapter, vf_cfg, vf) {
3967                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
3968                         if (status)
3969                                 goto err;
3970                 }
3971
3972                 status = be_vfs_mac_query(adapter);
3973                 if (status)
3974                         goto err;
3975         } else {
3976                 status = be_vfs_if_create(adapter);
3977                 if (status)
3978                         goto err;
3979
3980                 status = be_vf_eth_addr_config(adapter);
3981                 if (status)
3982                         goto err;
3983         }
3984
3985         for_all_vfs(adapter, vf_cfg, vf) {
3986                 /* Allow VFs to programs MAC/VLAN filters */
3987                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
3988                                                   vf + 1);
3989                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
3990                         status = be_cmd_set_fn_privileges(adapter,
3991                                                           vf_cfg->privileges |
3992                                                           BE_PRIV_FILTMGMT,
3993                                                           vf + 1);
3994                         if (!status) {
3995                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
3996                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
3997                                          vf);
3998                         }
3999                 }
4000
4001                 /* Allow full available bandwidth */
4002                 if (!old_vfs)
4003                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4004
4005                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4006                                                vf_cfg->if_handle, NULL,
4007                                                &spoofchk);
4008                 if (!status)
4009                         vf_cfg->spoofchk = spoofchk;
4010
4011                 if (!old_vfs) {
4012                         be_cmd_enable_vf(adapter, vf + 1);
4013                         be_cmd_set_logical_link_config(adapter,
4014                                                        IFLA_VF_LINK_STATE_AUTO,
4015                                                        vf+1);
4016                 }
4017         }
4018
4019         if (!old_vfs) {
4020                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4021                 if (status) {
4022                         dev_err(dev, "SRIOV enable failed\n");
4023                         adapter->num_vfs = 0;
4024                         goto err;
4025                 }
4026         }
4027
4028         if (BE3_chip(adapter)) {
4029                 /* On BE3, enable VEB only when SRIOV is enabled */
4030                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4031                                                adapter->if_handle,
4032                                                PORT_FWD_TYPE_VEB, 0);
4033                 if (status)
4034                         goto err;
4035         }
4036
4037         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4038         return 0;
4039 err:
4040         dev_err(dev, "VF setup failed\n");
4041         be_vf_clear(adapter);
4042         return status;
4043 }
4044
4045 /* Converting function_mode bits on BE3 to SH mc_type enums */
4046
4047 static u8 be_convert_mc_type(u32 function_mode)
4048 {
4049         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4050                 return vNIC1;
4051         else if (function_mode & QNQ_MODE)
4052                 return FLEX10;
4053         else if (function_mode & VNIC_MODE)
4054                 return vNIC2;
4055         else if (function_mode & UMC_ENABLED)
4056                 return UMC;
4057         else
4058                 return MC_NONE;
4059 }
4060
4061 /* On BE2/BE3 FW does not suggest the supported limits */
4062 static void BEx_get_resources(struct be_adapter *adapter,
4063                               struct be_resources *res)
4064 {
4065         bool use_sriov = adapter->num_vfs ? 1 : 0;
4066
4067         if (be_physfn(adapter))
4068                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4069         else
4070                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4071
4072         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4073
4074         if (be_is_mc(adapter)) {
4075                 /* Assuming that there are 4 channels per port,
4076                  * when multi-channel is enabled
4077                  */
4078                 if (be_is_qnq_mode(adapter))
4079                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4080                 else
4081                         /* In a non-qnq multichannel mode, the pvid
4082                          * takes up one vlan entry
4083                          */
4084                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4085         } else {
4086                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4087         }
4088
4089         res->max_mcast_mac = BE_MAX_MC;
4090
4091         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4092          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4093          *    *only* if it is RSS-capable.
4094          */
4095         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4096             be_virtfn(adapter) ||
4097             (be_is_mc(adapter) &&
4098              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4099                 res->max_tx_qs = 1;
4100         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4101                 struct be_resources super_nic_res = {0};
4102
4103                 /* On a SuperNIC profile, the driver needs to use the
4104                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4105                  */
4106                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4107                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4108                                           0);
4109                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4110                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4111         } else {
4112                 res->max_tx_qs = BE3_MAX_TX_QS;
4113         }
4114
4115         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4116             !use_sriov && be_physfn(adapter))
4117                 res->max_rss_qs = (adapter->be3_native) ?
4118                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4119         res->max_rx_qs = res->max_rss_qs + 1;
4120
4121         if (be_physfn(adapter))
4122                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4123                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4124         else
4125                 res->max_evt_qs = 1;
4126
4127         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4128         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4129         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4130                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4131 }
4132
4133 static void be_setup_init(struct be_adapter *adapter)
4134 {
4135         adapter->vlan_prio_bmap = 0xff;
4136         adapter->phy.link_speed = -1;
4137         adapter->if_handle = -1;
4138         adapter->be3_native = false;
4139         adapter->if_flags = 0;
4140         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4141         if (be_physfn(adapter))
4142                 adapter->cmd_privileges = MAX_PRIVILEGES;
4143         else
4144                 adapter->cmd_privileges = MIN_PRIVILEGES;
4145 }
4146
4147 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4148  * However, this HW limitation is not exposed to the host via any SLI cmd.
4149  * As a result, in the case of SRIOV and in particular multi-partition configs
4150  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4151  * for distribution between the VFs. This self-imposed limit will determine the
4152  * no: of VFs for which RSS can be enabled.
4153  */
4154 void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4155 {
4156         struct be_port_resources port_res = {0};
4157         u8 rss_tables_on_port;
4158         u16 max_vfs = be_max_vfs(adapter);
4159
4160         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4161                                   RESOURCE_LIMITS, 0);
4162
4163         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4164
4165         /* Each PF Pool's RSS Tables limit =
4166          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4167          */
4168         adapter->pool_res.max_rss_tables =
4169                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4170 }
4171
4172 static int be_get_sriov_config(struct be_adapter *adapter)
4173 {
4174         struct be_resources res = {0};
4175         int max_vfs, old_vfs;
4176
4177         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4178                                   RESOURCE_LIMITS, 0);
4179
4180         /* Some old versions of BE3 FW don't report max_vfs value */
4181         if (BE3_chip(adapter) && !res.max_vfs) {
4182                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4183                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4184         }
4185
4186         adapter->pool_res = res;
4187
4188         /* If during previous unload of the driver, the VFs were not disabled,
4189          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4190          * Instead use the TotalVFs value stored in the pci-dev struct.
4191          */
4192         old_vfs = pci_num_vf(adapter->pdev);
4193         if (old_vfs) {
4194                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4195                          old_vfs);
4196
4197                 adapter->pool_res.max_vfs =
4198                         pci_sriov_get_totalvfs(adapter->pdev);
4199                 adapter->num_vfs = old_vfs;
4200         }
4201
4202         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4203                 be_calculate_pf_pool_rss_tables(adapter);
4204                 dev_info(&adapter->pdev->dev,
4205                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4206                          be_max_pf_pool_rss_tables(adapter));
4207         }
4208         return 0;
4209 }
4210
4211 static void be_alloc_sriov_res(struct be_adapter *adapter)
4212 {
4213         int old_vfs = pci_num_vf(adapter->pdev);
4214         struct  be_resources vft_res = {0};
4215         int status;
4216
4217         be_get_sriov_config(adapter);
4218
4219         if (!old_vfs)
4220                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4221
4222         /* When the HW is in SRIOV capable configuration, the PF-pool
4223          * resources are given to PF during driver load, if there are no
4224          * old VFs. This facility is not available in BE3 FW.
4225          * Also, this is done by FW in Lancer chip.
4226          */
4227         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4228                 be_calculate_vf_res(adapter, 0, &vft_res);
4229                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4230                                                  &vft_res);
4231                 if (status)
4232                         dev_err(&adapter->pdev->dev,
4233                                 "Failed to optimize SRIOV resources\n");
4234         }
4235 }
4236
4237 static int be_get_resources(struct be_adapter *adapter)
4238 {
4239         struct device *dev = &adapter->pdev->dev;
4240         struct be_resources res = {0};
4241         int status;
4242
4243         /* For Lancer, SH etc read per-function resource limits from FW.
4244          * GET_FUNC_CONFIG returns per function guaranteed limits.
4245          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4246          */
4247         if (BEx_chip(adapter)) {
4248                 BEx_get_resources(adapter, &res);
4249         } else {
4250                 status = be_cmd_get_func_config(adapter, &res);
4251                 if (status)
4252                         return status;
4253
4254                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4255                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4256                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4257                         res.max_rss_qs -= 1;
4258         }
4259
4260         /* If RoCE is supported stash away half the EQs for RoCE */
4261         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4262                                 res.max_evt_qs / 2 : res.max_evt_qs;
4263         adapter->res = res;
4264
4265         /* If FW supports RSS default queue, then skip creating non-RSS
4266          * queue for non-IP traffic.
4267          */
4268         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4269                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4270
4271         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4272                  be_max_txqs(adapter), be_max_rxqs(adapter),
4273                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4274                  be_max_vfs(adapter));
4275         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4276                  be_max_uc(adapter), be_max_mc(adapter),
4277                  be_max_vlans(adapter));
4278
4279         /* Ensure RX and TX queues are created in pairs at init time */
4280         adapter->cfg_num_rx_irqs =
4281                                 min_t(u16, netif_get_num_default_rss_queues(),
4282                                       be_max_qp_irqs(adapter));
4283         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4284         return 0;
4285 }
4286
4287 static int be_get_config(struct be_adapter *adapter)
4288 {
4289         int status, level;
4290         u16 profile_id;
4291
4292         status = be_cmd_get_cntl_attributes(adapter);
4293         if (status)
4294                 return status;
4295
4296         status = be_cmd_query_fw_cfg(adapter);
4297         if (status)
4298                 return status;
4299
4300         if (!lancer_chip(adapter) && be_physfn(adapter))
4301                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4302
4303         if (BEx_chip(adapter)) {
4304                 level = be_cmd_get_fw_log_level(adapter);
4305                 adapter->msg_enable =
4306                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4307         }
4308
4309         be_cmd_get_acpi_wol_cap(adapter);
4310         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4311         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4312
4313         be_cmd_query_port_name(adapter);
4314
4315         if (be_physfn(adapter)) {
4316                 status = be_cmd_get_active_profile(adapter, &profile_id);
4317                 if (!status)
4318                         dev_info(&adapter->pdev->dev,
4319                                  "Using profile 0x%x\n", profile_id);
4320         }
4321
4322         return 0;
4323 }
4324
4325 static int be_mac_setup(struct be_adapter *adapter)
4326 {
4327         u8 mac[ETH_ALEN];
4328         int status;
4329
4330         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4331                 status = be_cmd_get_perm_mac(adapter, mac);
4332                 if (status)
4333                         return status;
4334
4335                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4336                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4337         }
4338
4339         return 0;
4340 }
4341
4342 static void be_schedule_worker(struct be_adapter *adapter)
4343 {
4344         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
4345         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4346 }
4347
4348 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4349 {
4350         schedule_delayed_work(&adapter->be_err_detection_work,
4351                               msecs_to_jiffies(delay));
4352         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4353 }
4354
4355 static int be_setup_queues(struct be_adapter *adapter)
4356 {
4357         struct net_device *netdev = adapter->netdev;
4358         int status;
4359
4360         status = be_evt_queues_create(adapter);
4361         if (status)
4362                 goto err;
4363
4364         status = be_tx_qs_create(adapter);
4365         if (status)
4366                 goto err;
4367
4368         status = be_rx_cqs_create(adapter);
4369         if (status)
4370                 goto err;
4371
4372         status = be_mcc_queues_create(adapter);
4373         if (status)
4374                 goto err;
4375
4376         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4377         if (status)
4378                 goto err;
4379
4380         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4381         if (status)
4382                 goto err;
4383
4384         return 0;
4385 err:
4386         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4387         return status;
4388 }
4389
4390 static int be_if_create(struct be_adapter *adapter)
4391 {
4392         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4393         u32 cap_flags = be_if_cap_flags(adapter);
4394         int status;
4395
4396         if (adapter->cfg_num_rx_irqs == 1)
4397                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4398
4399         en_flags &= cap_flags;
4400         /* will enable all the needed filter flags in be_open() */
4401         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4402                                   &adapter->if_handle, 0);
4403
4404         return status;
4405 }
4406
4407 int be_update_queues(struct be_adapter *adapter)
4408 {
4409         struct net_device *netdev = adapter->netdev;
4410         int status;
4411
4412         if (netif_running(netdev))
4413                 be_close(netdev);
4414
4415         be_cancel_worker(adapter);
4416
4417         /* If any vectors have been shared with RoCE we cannot re-program
4418          * the MSIx table.
4419          */
4420         if (!adapter->num_msix_roce_vec)
4421                 be_msix_disable(adapter);
4422
4423         be_clear_queues(adapter);
4424         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4425         if (status)
4426                 return status;
4427
4428         if (!msix_enabled(adapter)) {
4429                 status = be_msix_enable(adapter);
4430                 if (status)
4431                         return status;
4432         }
4433
4434         status = be_if_create(adapter);
4435         if (status)
4436                 return status;
4437
4438         status = be_setup_queues(adapter);
4439         if (status)
4440                 return status;
4441
4442         be_schedule_worker(adapter);
4443
4444         if (netif_running(netdev))
4445                 status = be_open(netdev);
4446
4447         return status;
4448 }
4449
4450 static inline int fw_major_num(const char *fw_ver)
4451 {
4452         int fw_major = 0, i;
4453
4454         i = sscanf(fw_ver, "%d.", &fw_major);
4455         if (i != 1)
4456                 return 0;
4457
4458         return fw_major;
4459 }
4460
4461 /* If any VFs are already enabled don't FLR the PF */
4462 static bool be_reset_required(struct be_adapter *adapter)
4463 {
4464         return pci_num_vf(adapter->pdev) ? false : true;
4465 }
4466
4467 /* Wait for the FW to be ready and perform the required initialization */
4468 static int be_func_init(struct be_adapter *adapter)
4469 {
4470         int status;
4471
4472         status = be_fw_wait_ready(adapter);
4473         if (status)
4474                 return status;
4475
4476         if (be_reset_required(adapter)) {
4477                 status = be_cmd_reset_function(adapter);
4478                 if (status)
4479                         return status;
4480
4481                 /* Wait for interrupts to quiesce after an FLR */
4482                 msleep(100);
4483
4484                 /* We can clear all errors when function reset succeeds */
4485                 be_clear_error(adapter, BE_CLEAR_ALL);
4486         }
4487
4488         /* Tell FW we're ready to fire cmds */
4489         status = be_cmd_fw_init(adapter);
4490         if (status)
4491                 return status;
4492
4493         /* Allow interrupts for other ULPs running on NIC function */
4494         be_intr_set(adapter, true);
4495
4496         return 0;
4497 }
4498
4499 static int be_setup(struct be_adapter *adapter)
4500 {
4501         struct device *dev = &adapter->pdev->dev;
4502         int status;
4503
4504         status = be_func_init(adapter);
4505         if (status)
4506                 return status;
4507
4508         be_setup_init(adapter);
4509
4510         if (!lancer_chip(adapter))
4511                 be_cmd_req_native_mode(adapter);
4512
4513         /* invoke this cmd first to get pf_num and vf_num which are needed
4514          * for issuing profile related cmds
4515          */
4516         if (!BEx_chip(adapter)) {
4517                 status = be_cmd_get_func_config(adapter, NULL);
4518                 if (status)
4519                         return status;
4520         }
4521
4522         status = be_get_config(adapter);
4523         if (status)
4524                 goto err;
4525
4526         if (!BE2_chip(adapter) && be_physfn(adapter))
4527                 be_alloc_sriov_res(adapter);
4528
4529         status = be_get_resources(adapter);
4530         if (status)
4531                 goto err;
4532
4533         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4534                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4535         if (!adapter->pmac_id)
4536                 return -ENOMEM;
4537
4538         status = be_msix_enable(adapter);
4539         if (status)
4540                 goto err;
4541
4542         /* will enable all the needed filter flags in be_open() */
4543         status = be_if_create(adapter);
4544         if (status)
4545                 goto err;
4546
4547         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4548         rtnl_lock();
4549         status = be_setup_queues(adapter);
4550         rtnl_unlock();
4551         if (status)
4552                 goto err;
4553
4554         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4555
4556         status = be_mac_setup(adapter);
4557         if (status)
4558                 goto err;
4559
4560         be_cmd_get_fw_ver(adapter);
4561         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4562
4563         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4564                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4565                         adapter->fw_ver);
4566                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4567         }
4568
4569         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4570                                          adapter->rx_fc);
4571         if (status)
4572                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4573                                         &adapter->rx_fc);
4574
4575         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4576                  adapter->tx_fc, adapter->rx_fc);
4577
4578         if (be_physfn(adapter))
4579                 be_cmd_set_logical_link_config(adapter,
4580                                                IFLA_VF_LINK_STATE_AUTO, 0);
4581
4582         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4583          * confusing a linux bridge or OVS that it might be connected to.
4584          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4585          * when SRIOV is not enabled.
4586          */
4587         if (BE3_chip(adapter))
4588                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4589                                       PORT_FWD_TYPE_PASSTHRU, 0);
4590
4591         if (adapter->num_vfs)
4592                 be_vf_setup(adapter);
4593
4594         status = be_cmd_get_phy_info(adapter);
4595         if (!status && be_pause_supported(adapter))
4596                 adapter->phy.fc_autoneg = 1;
4597
4598         be_schedule_worker(adapter);
4599         adapter->flags |= BE_FLAGS_SETUP_DONE;
4600         return 0;
4601 err:
4602         be_clear(adapter);
4603         return status;
4604 }
4605
4606 #ifdef CONFIG_NET_POLL_CONTROLLER
4607 static void be_netpoll(struct net_device *netdev)
4608 {
4609         struct be_adapter *adapter = netdev_priv(netdev);
4610         struct be_eq_obj *eqo;
4611         int i;
4612
4613         for_all_evt_queues(adapter, eqo, i) {
4614                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4615                 napi_schedule(&eqo->napi);
4616         }
4617 }
4618 #endif
4619
4620 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4621 {
4622         const struct firmware *fw;
4623         int status;
4624
4625         if (!netif_running(adapter->netdev)) {
4626                 dev_err(&adapter->pdev->dev,
4627                         "Firmware load not allowed (interface is down)\n");
4628                 return -ENETDOWN;
4629         }
4630
4631         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4632         if (status)
4633                 goto fw_exit;
4634
4635         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4636
4637         if (lancer_chip(adapter))
4638                 status = lancer_fw_download(adapter, fw);
4639         else
4640                 status = be_fw_download(adapter, fw);
4641
4642         if (!status)
4643                 be_cmd_get_fw_ver(adapter);
4644
4645 fw_exit:
4646         release_firmware(fw);
4647         return status;
4648 }
4649
4650 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4651                                  u16 flags)
4652 {
4653         struct be_adapter *adapter = netdev_priv(dev);
4654         struct nlattr *attr, *br_spec;
4655         int rem;
4656         int status = 0;
4657         u16 mode = 0;
4658
4659         if (!sriov_enabled(adapter))
4660                 return -EOPNOTSUPP;
4661
4662         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4663         if (!br_spec)
4664                 return -EINVAL;
4665
4666         nla_for_each_nested(attr, br_spec, rem) {
4667                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4668                         continue;
4669
4670                 if (nla_len(attr) < sizeof(mode))
4671                         return -EINVAL;
4672
4673                 mode = nla_get_u16(attr);
4674                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4675                         return -EOPNOTSUPP;
4676
4677                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4678                         return -EINVAL;
4679
4680                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4681                                                adapter->if_handle,
4682                                                mode == BRIDGE_MODE_VEPA ?
4683                                                PORT_FWD_TYPE_VEPA :
4684                                                PORT_FWD_TYPE_VEB, 0);
4685                 if (status)
4686                         goto err;
4687
4688                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4689                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4690
4691                 return status;
4692         }
4693 err:
4694         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4695                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4696
4697         return status;
4698 }
4699
4700 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4701                                  struct net_device *dev, u32 filter_mask,
4702                                  int nlflags)
4703 {
4704         struct be_adapter *adapter = netdev_priv(dev);
4705         int status = 0;
4706         u8 hsw_mode;
4707
4708         /* BE and Lancer chips support VEB mode only */
4709         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4710                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4711                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4712                         return 0;
4713                 hsw_mode = PORT_FWD_TYPE_VEB;
4714         } else {
4715                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4716                                                adapter->if_handle, &hsw_mode,
4717                                                NULL);
4718                 if (status)
4719                         return 0;
4720
4721                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4722                         return 0;
4723         }
4724
4725         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4726                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4727                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4728                                        0, 0, nlflags, filter_mask, NULL);
4729 }
4730
4731 /* VxLAN offload Notes:
4732  *
4733  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4734  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4735  * is expected to work across all types of IP tunnels once exported. Skyhawk
4736  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4737  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4738  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4739  * those other tunnels are unexported on the fly through ndo_features_check().
4740  *
4741  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4742  * adds more than one port, disable offloads and don't re-enable them again
4743  * until after all the tunnels are removed.
4744  */
4745 static void be_add_vxlan_port(struct net_device *netdev,
4746                               struct udp_tunnel_info *ti)
4747 {
4748         struct be_adapter *adapter = netdev_priv(netdev);
4749         struct device *dev = &adapter->pdev->dev;
4750         __be16 port = ti->port;
4751         int status;
4752
4753         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4754                 return;
4755
4756         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4757                 return;
4758
4759         if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
4760                 adapter->vxlan_port_aliases++;
4761                 return;
4762         }
4763
4764         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4765                 dev_info(dev,
4766                          "Only one UDP port supported for VxLAN offloads\n");
4767                 dev_info(dev, "Disabling VxLAN offloads\n");
4768                 adapter->vxlan_port_count++;
4769                 goto err;
4770         }
4771
4772         if (adapter->vxlan_port_count++ >= 1)
4773                 return;
4774
4775         status = be_cmd_manage_iface(adapter, adapter->if_handle,
4776                                      OP_CONVERT_NORMAL_TO_TUNNEL);
4777         if (status) {
4778                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
4779                 goto err;
4780         }
4781
4782         status = be_cmd_set_vxlan_port(adapter, port);
4783         if (status) {
4784                 dev_warn(dev, "Failed to add VxLAN port\n");
4785                 goto err;
4786         }
4787         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
4788         adapter->vxlan_port = port;
4789
4790         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4791                                    NETIF_F_TSO | NETIF_F_TSO6 |
4792                                    NETIF_F_GSO_UDP_TUNNEL;
4793         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
4794         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
4795
4796         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4797                  be16_to_cpu(port));
4798         return;
4799 err:
4800         be_disable_vxlan_offloads(adapter);
4801 }
4802
4803 static void be_del_vxlan_port(struct net_device *netdev,
4804                               struct udp_tunnel_info *ti)
4805 {
4806         struct be_adapter *adapter = netdev_priv(netdev);
4807         __be16 port = ti->port;
4808
4809         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
4810                 return;
4811
4812         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
4813                 return;
4814
4815         if (adapter->vxlan_port != port)
4816                 goto done;
4817
4818         if (adapter->vxlan_port_aliases) {
4819                 adapter->vxlan_port_aliases--;
4820                 return;
4821         }
4822
4823         be_disable_vxlan_offloads(adapter);
4824
4825         dev_info(&adapter->pdev->dev,
4826                  "Disabled VxLAN offloads for UDP port %d\n",
4827                  be16_to_cpu(port));
4828 done:
4829         adapter->vxlan_port_count--;
4830 }
4831
4832 static netdev_features_t be_features_check(struct sk_buff *skb,
4833                                            struct net_device *dev,
4834                                            netdev_features_t features)
4835 {
4836         struct be_adapter *adapter = netdev_priv(dev);
4837         u8 l4_hdr = 0;
4838
4839         /* The code below restricts offload features for some tunneled packets.
4840          * Offload features for normal (non tunnel) packets are unchanged.
4841          */
4842         if (!skb->encapsulation ||
4843             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
4844                 return features;
4845
4846         /* It's an encapsulated packet and VxLAN offloads are enabled. We
4847          * should disable tunnel offload features if it's not a VxLAN packet,
4848          * as tunnel offloads have been enabled only for VxLAN. This is done to
4849          * allow other tunneled traffic like GRE work fine while VxLAN
4850          * offloads are configured in Skyhawk-R.
4851          */
4852         switch (vlan_get_protocol(skb)) {
4853         case htons(ETH_P_IP):
4854                 l4_hdr = ip_hdr(skb)->protocol;
4855                 break;
4856         case htons(ETH_P_IPV6):
4857                 l4_hdr = ipv6_hdr(skb)->nexthdr;
4858                 break;
4859         default:
4860                 return features;
4861         }
4862
4863         if (l4_hdr != IPPROTO_UDP ||
4864             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
4865             skb->inner_protocol != htons(ETH_P_TEB) ||
4866             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
4867             sizeof(struct udphdr) + sizeof(struct vxlanhdr))
4868                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4869
4870         return features;
4871 }
4872
4873 static int be_get_phys_port_id(struct net_device *dev,
4874                                struct netdev_phys_item_id *ppid)
4875 {
4876         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
4877         struct be_adapter *adapter = netdev_priv(dev);
4878         u8 *id;
4879
4880         if (MAX_PHYS_ITEM_ID_LEN < id_len)
4881                 return -ENOSPC;
4882
4883         ppid->id[0] = adapter->hba_port_num + 1;
4884         id = &ppid->id[1];
4885         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
4886              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
4887                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
4888
4889         ppid->id_len = id_len;
4890
4891         return 0;
4892 }
4893
4894 static const struct net_device_ops be_netdev_ops = {
4895         .ndo_open               = be_open,
4896         .ndo_stop               = be_close,
4897         .ndo_start_xmit         = be_xmit,
4898         .ndo_set_rx_mode        = be_set_rx_mode,
4899         .ndo_set_mac_address    = be_mac_addr_set,
4900         .ndo_change_mtu         = be_change_mtu,
4901         .ndo_get_stats64        = be_get_stats64,
4902         .ndo_validate_addr      = eth_validate_addr,
4903         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
4904         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
4905         .ndo_set_vf_mac         = be_set_vf_mac,
4906         .ndo_set_vf_vlan        = be_set_vf_vlan,
4907         .ndo_set_vf_rate        = be_set_vf_tx_rate,
4908         .ndo_get_vf_config      = be_get_vf_config,
4909         .ndo_set_vf_link_state  = be_set_vf_link_state,
4910         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
4911 #ifdef CONFIG_NET_POLL_CONTROLLER
4912         .ndo_poll_controller    = be_netpoll,
4913 #endif
4914         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
4915         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
4916 #ifdef CONFIG_NET_RX_BUSY_POLL
4917         .ndo_busy_poll          = be_busy_poll,
4918 #endif
4919         .ndo_udp_tunnel_add     = be_add_vxlan_port,
4920         .ndo_udp_tunnel_del     = be_del_vxlan_port,
4921         .ndo_features_check     = be_features_check,
4922         .ndo_get_phys_port_id   = be_get_phys_port_id,
4923 };
4924
4925 static void be_netdev_init(struct net_device *netdev)
4926 {
4927         struct be_adapter *adapter = netdev_priv(netdev);
4928
4929         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4930                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
4931                 NETIF_F_HW_VLAN_CTAG_TX;
4932         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
4933                 netdev->hw_features |= NETIF_F_RXHASH;
4934
4935         netdev->features |= netdev->hw_features |
4936                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
4937
4938         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
4939                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4940
4941         netdev->priv_flags |= IFF_UNICAST_FLT;
4942
4943         netdev->flags |= IFF_MULTICAST;
4944
4945         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
4946
4947         netdev->netdev_ops = &be_netdev_ops;
4948
4949         netdev->ethtool_ops = &be_ethtool_ops;
4950 }
4951
4952 static void be_cleanup(struct be_adapter *adapter)
4953 {
4954         struct net_device *netdev = adapter->netdev;
4955
4956         rtnl_lock();
4957         netif_device_detach(netdev);
4958         if (netif_running(netdev))
4959                 be_close(netdev);
4960         rtnl_unlock();
4961
4962         be_clear(adapter);
4963 }
4964
4965 static int be_resume(struct be_adapter *adapter)
4966 {
4967         struct net_device *netdev = adapter->netdev;
4968         int status;
4969
4970         status = be_setup(adapter);
4971         if (status)
4972                 return status;
4973
4974         rtnl_lock();
4975         if (netif_running(netdev))
4976                 status = be_open(netdev);
4977         rtnl_unlock();
4978
4979         if (status)
4980                 return status;
4981
4982         netif_device_attach(netdev);
4983
4984         return 0;
4985 }
4986
4987 static int be_err_recover(struct be_adapter *adapter)
4988 {
4989         int status;
4990
4991         /* Error recovery is supported only Lancer as of now */
4992         if (!lancer_chip(adapter))
4993                 return -EIO;
4994
4995         /* Wait for adapter to reach quiescent state before
4996          * destroying queues
4997          */
4998         status = be_fw_wait_ready(adapter);
4999         if (status)
5000                 goto err;
5001
5002         be_cleanup(adapter);
5003
5004         status = be_resume(adapter);
5005         if (status)
5006                 goto err;
5007
5008         return 0;
5009 err:
5010         return status;
5011 }
5012
5013 static void be_err_detection_task(struct work_struct *work)
5014 {
5015         struct be_adapter *adapter =
5016                                 container_of(work, struct be_adapter,
5017                                              be_err_detection_work.work);
5018         struct device *dev = &adapter->pdev->dev;
5019         int recovery_status;
5020         int delay = ERR_DETECTION_DELAY;
5021
5022         be_detect_error(adapter);
5023
5024         if (be_check_error(adapter, BE_ERROR_HW))
5025                 recovery_status = be_err_recover(adapter);
5026         else
5027                 goto reschedule_task;
5028
5029         if (!recovery_status) {
5030                 adapter->recovery_retries = 0;
5031                 dev_info(dev, "Adapter recovery successful\n");
5032                 goto reschedule_task;
5033         } else if (be_virtfn(adapter)) {
5034                 /* For VFs, check if PF have allocated resources
5035                  * every second.
5036                  */
5037                 dev_err(dev, "Re-trying adapter recovery\n");
5038                 goto reschedule_task;
5039         } else if (adapter->recovery_retries++ <
5040                    MAX_ERR_RECOVERY_RETRY_COUNT) {
5041                 /* In case of another error during recovery, it takes 30 sec
5042                  * for adapter to come out of error. Retry error recovery after
5043                  * this time interval.
5044                  */
5045                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5046                 delay = ERR_RECOVERY_RETRY_DELAY;
5047                 goto reschedule_task;
5048         } else {
5049                 dev_err(dev, "Adapter recovery failed\n");
5050         }
5051
5052         return;
5053 reschedule_task:
5054         be_schedule_err_detection(adapter, delay);
5055 }
5056
5057 static void be_log_sfp_info(struct be_adapter *adapter)
5058 {
5059         int status;
5060
5061         status = be_cmd_query_sfp_info(adapter);
5062         if (!status) {
5063                 dev_err(&adapter->pdev->dev,
5064                         "Port %c: %s Vendor: %s part no: %s",
5065                         adapter->port_name,
5066                         be_misconfig_evt_port_state[adapter->phy_state],
5067                         adapter->phy.vendor_name,
5068                         adapter->phy.vendor_pn);
5069         }
5070         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5071 }
5072
5073 static void be_worker(struct work_struct *work)
5074 {
5075         struct be_adapter *adapter =
5076                 container_of(work, struct be_adapter, work.work);
5077         struct be_rx_obj *rxo;
5078         int i;
5079
5080         if (be_physfn(adapter) &&
5081             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5082                 be_cmd_get_die_temperature(adapter);
5083
5084         /* when interrupts are not yet enabled, just reap any pending
5085          * mcc completions
5086          */
5087         if (!netif_running(adapter->netdev)) {
5088                 local_bh_disable();
5089                 be_process_mcc(adapter);
5090                 local_bh_enable();
5091                 goto reschedule;
5092         }
5093
5094         if (!adapter->stats_cmd_sent) {
5095                 if (lancer_chip(adapter))
5096                         lancer_cmd_get_pport_stats(adapter,
5097                                                    &adapter->stats_cmd);
5098                 else
5099                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5100         }
5101
5102         for_all_rx_queues(adapter, rxo, i) {
5103                 /* Replenish RX-queues starved due to memory
5104                  * allocation failures.
5105                  */
5106                 if (rxo->rx_post_starved)
5107                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5108         }
5109
5110         /* EQ-delay update for Skyhawk is done while notifying EQ */
5111         if (!skyhawk_chip(adapter))
5112                 be_eqd_update(adapter, false);
5113
5114         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5115                 be_log_sfp_info(adapter);
5116
5117 reschedule:
5118         adapter->work_counter++;
5119         schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
5120 }
5121
5122 static void be_unmap_pci_bars(struct be_adapter *adapter)
5123 {
5124         if (adapter->csr)
5125                 pci_iounmap(adapter->pdev, adapter->csr);
5126         if (adapter->db)
5127                 pci_iounmap(adapter->pdev, adapter->db);
5128         if (adapter->pcicfg && adapter->pcicfg_mapped)
5129                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5130 }
5131
5132 static int db_bar(struct be_adapter *adapter)
5133 {
5134         if (lancer_chip(adapter) || be_virtfn(adapter))
5135                 return 0;
5136         else
5137                 return 4;
5138 }
5139
5140 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5141 {
5142         if (skyhawk_chip(adapter)) {
5143                 adapter->roce_db.size = 4096;
5144                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5145                                                               db_bar(adapter));
5146                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5147                                                                db_bar(adapter));
5148         }
5149         return 0;
5150 }
5151
5152 static int be_map_pci_bars(struct be_adapter *adapter)
5153 {
5154         struct pci_dev *pdev = adapter->pdev;
5155         u8 __iomem *addr;
5156         u32 sli_intf;
5157
5158         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5159         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5160                                 SLI_INTF_FAMILY_SHIFT;
5161         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5162
5163         if (BEx_chip(adapter) && be_physfn(adapter)) {
5164                 adapter->csr = pci_iomap(pdev, 2, 0);
5165                 if (!adapter->csr)
5166                         return -ENOMEM;
5167         }
5168
5169         addr = pci_iomap(pdev, db_bar(adapter), 0);
5170         if (!addr)
5171                 goto pci_map_err;
5172         adapter->db = addr;
5173
5174         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5175                 if (be_physfn(adapter)) {
5176                         /* PCICFG is the 2nd BAR in BE2 */
5177                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5178                         if (!addr)
5179                                 goto pci_map_err;
5180                         adapter->pcicfg = addr;
5181                         adapter->pcicfg_mapped = true;
5182                 } else {
5183                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5184                         adapter->pcicfg_mapped = false;
5185                 }
5186         }
5187
5188         be_roce_map_pci_bars(adapter);
5189         return 0;
5190
5191 pci_map_err:
5192         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5193         be_unmap_pci_bars(adapter);
5194         return -ENOMEM;
5195 }
5196
5197 static void be_drv_cleanup(struct be_adapter *adapter)
5198 {
5199         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5200         struct device *dev = &adapter->pdev->dev;
5201
5202         if (mem->va)
5203                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5204
5205         mem = &adapter->rx_filter;
5206         if (mem->va)
5207                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5208
5209         mem = &adapter->stats_cmd;
5210         if (mem->va)
5211                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5212 }
5213
5214 /* Allocate and initialize various fields in be_adapter struct */
5215 static int be_drv_init(struct be_adapter *adapter)
5216 {
5217         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5218         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5219         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5220         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5221         struct device *dev = &adapter->pdev->dev;
5222         int status = 0;
5223
5224         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5225         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5226                                                  &mbox_mem_alloc->dma,
5227                                                  GFP_KERNEL);
5228         if (!mbox_mem_alloc->va)
5229                 return -ENOMEM;
5230
5231         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5232         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5233         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5234
5235         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5236         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5237                                             &rx_filter->dma, GFP_KERNEL);
5238         if (!rx_filter->va) {
5239                 status = -ENOMEM;
5240                 goto free_mbox;
5241         }
5242
5243         if (lancer_chip(adapter))
5244                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5245         else if (BE2_chip(adapter))
5246                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5247         else if (BE3_chip(adapter))
5248                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5249         else
5250                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5251         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5252                                             &stats_cmd->dma, GFP_KERNEL);
5253         if (!stats_cmd->va) {
5254                 status = -ENOMEM;
5255                 goto free_rx_filter;
5256         }
5257
5258         mutex_init(&adapter->mbox_lock);
5259         spin_lock_init(&adapter->mcc_lock);
5260         spin_lock_init(&adapter->mcc_cq_lock);
5261         init_completion(&adapter->et_cmd_compl);
5262
5263         pci_save_state(adapter->pdev);
5264
5265         INIT_DELAYED_WORK(&adapter->work, be_worker);
5266         INIT_DELAYED_WORK(&adapter->be_err_detection_work,
5267                           be_err_detection_task);
5268
5269         adapter->rx_fc = true;
5270         adapter->tx_fc = true;
5271
5272         /* Must be a power of 2 or else MODULO will BUG_ON */
5273         adapter->be_get_temp_freq = 64;
5274
5275         return 0;
5276
5277 free_rx_filter:
5278         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5279 free_mbox:
5280         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5281                           mbox_mem_alloc->dma);
5282         return status;
5283 }
5284
5285 static void be_remove(struct pci_dev *pdev)
5286 {
5287         struct be_adapter *adapter = pci_get_drvdata(pdev);
5288
5289         if (!adapter)
5290                 return;
5291
5292         be_roce_dev_remove(adapter);
5293         be_intr_set(adapter, false);
5294
5295         be_cancel_err_detection(adapter);
5296
5297         unregister_netdev(adapter->netdev);
5298
5299         be_clear(adapter);
5300
5301         /* tell fw we're done with firing cmds */
5302         be_cmd_fw_clean(adapter);
5303
5304         be_unmap_pci_bars(adapter);
5305         be_drv_cleanup(adapter);
5306
5307         pci_disable_pcie_error_reporting(pdev);
5308
5309         pci_release_regions(pdev);
5310         pci_disable_device(pdev);
5311
5312         free_netdev(adapter->netdev);
5313 }
5314
5315 static ssize_t be_hwmon_show_temp(struct device *dev,
5316                                   struct device_attribute *dev_attr,
5317                                   char *buf)
5318 {
5319         struct be_adapter *adapter = dev_get_drvdata(dev);
5320
5321         /* Unit: millidegree Celsius */
5322         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5323                 return -EIO;
5324         else
5325                 return sprintf(buf, "%u\n",
5326                                adapter->hwmon_info.be_on_die_temp * 1000);
5327 }
5328
5329 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5330                           be_hwmon_show_temp, NULL, 1);
5331
5332 static struct attribute *be_hwmon_attrs[] = {
5333         &sensor_dev_attr_temp1_input.dev_attr.attr,
5334         NULL
5335 };
5336
5337 ATTRIBUTE_GROUPS(be_hwmon);
5338
5339 static char *mc_name(struct be_adapter *adapter)
5340 {
5341         char *str = ""; /* default */
5342
5343         switch (adapter->mc_type) {
5344         case UMC:
5345                 str = "UMC";
5346                 break;
5347         case FLEX10:
5348                 str = "FLEX10";
5349                 break;
5350         case vNIC1:
5351                 str = "vNIC-1";
5352                 break;
5353         case nPAR:
5354                 str = "nPAR";
5355                 break;
5356         case UFP:
5357                 str = "UFP";
5358                 break;
5359         case vNIC2:
5360                 str = "vNIC-2";
5361                 break;
5362         default:
5363                 str = "";
5364         }
5365
5366         return str;
5367 }
5368
5369 static inline char *func_name(struct be_adapter *adapter)
5370 {
5371         return be_physfn(adapter) ? "PF" : "VF";
5372 }
5373
5374 static inline char *nic_name(struct pci_dev *pdev)
5375 {
5376         switch (pdev->device) {
5377         case OC_DEVICE_ID1:
5378                 return OC_NAME;
5379         case OC_DEVICE_ID2:
5380                 return OC_NAME_BE;
5381         case OC_DEVICE_ID3:
5382         case OC_DEVICE_ID4:
5383                 return OC_NAME_LANCER;
5384         case BE_DEVICE_ID2:
5385                 return BE3_NAME;
5386         case OC_DEVICE_ID5:
5387         case OC_DEVICE_ID6:
5388                 return OC_NAME_SH;
5389         default:
5390                 return BE_NAME;
5391         }
5392 }
5393
5394 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5395 {
5396         struct be_adapter *adapter;
5397         struct net_device *netdev;
5398         int status = 0;
5399
5400         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5401
5402         status = pci_enable_device(pdev);
5403         if (status)
5404                 goto do_none;
5405
5406         status = pci_request_regions(pdev, DRV_NAME);
5407         if (status)
5408                 goto disable_dev;
5409         pci_set_master(pdev);
5410
5411         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5412         if (!netdev) {
5413                 status = -ENOMEM;
5414                 goto rel_reg;
5415         }
5416         adapter = netdev_priv(netdev);
5417         adapter->pdev = pdev;
5418         pci_set_drvdata(pdev, adapter);
5419         adapter->netdev = netdev;
5420         SET_NETDEV_DEV(netdev, &pdev->dev);
5421
5422         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5423         if (!status) {
5424                 netdev->features |= NETIF_F_HIGHDMA;
5425         } else {
5426                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5427                 if (status) {
5428                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5429                         goto free_netdev;
5430                 }
5431         }
5432
5433         status = pci_enable_pcie_error_reporting(pdev);
5434         if (!status)
5435                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5436
5437         status = be_map_pci_bars(adapter);
5438         if (status)
5439                 goto free_netdev;
5440
5441         status = be_drv_init(adapter);
5442         if (status)
5443                 goto unmap_bars;
5444
5445         status = be_setup(adapter);
5446         if (status)
5447                 goto drv_cleanup;
5448
5449         be_netdev_init(netdev);
5450         status = register_netdev(netdev);
5451         if (status != 0)
5452                 goto unsetup;
5453
5454         be_roce_dev_add(adapter);
5455
5456         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5457
5458         /* On Die temperature not supported for VF. */
5459         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5460                 adapter->hwmon_info.hwmon_dev =
5461                         devm_hwmon_device_register_with_groups(&pdev->dev,
5462                                                                DRV_NAME,
5463                                                                adapter,
5464                                                                be_hwmon_groups);
5465                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5466         }
5467
5468         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5469                  func_name(adapter), mc_name(adapter), adapter->port_name);
5470
5471         return 0;
5472
5473 unsetup:
5474         be_clear(adapter);
5475 drv_cleanup:
5476         be_drv_cleanup(adapter);
5477 unmap_bars:
5478         be_unmap_pci_bars(adapter);
5479 free_netdev:
5480         free_netdev(netdev);
5481 rel_reg:
5482         pci_release_regions(pdev);
5483 disable_dev:
5484         pci_disable_device(pdev);
5485 do_none:
5486         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5487         return status;
5488 }
5489
5490 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5491 {
5492         struct be_adapter *adapter = pci_get_drvdata(pdev);
5493
5494         be_intr_set(adapter, false);
5495         be_cancel_err_detection(adapter);
5496
5497         be_cleanup(adapter);
5498
5499         pci_save_state(pdev);
5500         pci_disable_device(pdev);
5501         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5502         return 0;
5503 }
5504
5505 static int be_pci_resume(struct pci_dev *pdev)
5506 {
5507         struct be_adapter *adapter = pci_get_drvdata(pdev);
5508         int status = 0;
5509
5510         status = pci_enable_device(pdev);
5511         if (status)
5512                 return status;
5513
5514         pci_restore_state(pdev);
5515
5516         status = be_resume(adapter);
5517         if (status)
5518                 return status;
5519
5520         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5521
5522         return 0;
5523 }
5524
5525 /*
5526  * An FLR will stop BE from DMAing any data.
5527  */
5528 static void be_shutdown(struct pci_dev *pdev)
5529 {
5530         struct be_adapter *adapter = pci_get_drvdata(pdev);
5531
5532         if (!adapter)
5533                 return;
5534
5535         be_roce_dev_shutdown(adapter);
5536         cancel_delayed_work_sync(&adapter->work);
5537         be_cancel_err_detection(adapter);
5538
5539         netif_device_detach(adapter->netdev);
5540
5541         be_cmd_reset_function(adapter);
5542
5543         pci_disable_device(pdev);
5544 }
5545
5546 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5547                                             pci_channel_state_t state)
5548 {
5549         struct be_adapter *adapter = pci_get_drvdata(pdev);
5550
5551         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5552
5553         be_roce_dev_remove(adapter);
5554
5555         if (!be_check_error(adapter, BE_ERROR_EEH)) {
5556                 be_set_error(adapter, BE_ERROR_EEH);
5557
5558                 be_cancel_err_detection(adapter);
5559
5560                 be_cleanup(adapter);
5561         }
5562
5563         if (state == pci_channel_io_perm_failure)
5564                 return PCI_ERS_RESULT_DISCONNECT;
5565
5566         pci_disable_device(pdev);
5567
5568         /* The error could cause the FW to trigger a flash debug dump.
5569          * Resetting the card while flash dump is in progress
5570          * can cause it not to recover; wait for it to finish.
5571          * Wait only for first function as it is needed only once per
5572          * adapter.
5573          */
5574         if (pdev->devfn == 0)
5575                 ssleep(30);
5576
5577         return PCI_ERS_RESULT_NEED_RESET;
5578 }
5579
5580 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
5581 {
5582         struct be_adapter *adapter = pci_get_drvdata(pdev);
5583         int status;
5584
5585         dev_info(&adapter->pdev->dev, "EEH reset\n");
5586
5587         status = pci_enable_device(pdev);
5588         if (status)
5589                 return PCI_ERS_RESULT_DISCONNECT;
5590
5591         pci_set_master(pdev);
5592         pci_restore_state(pdev);
5593
5594         /* Check if card is ok and fw is ready */
5595         dev_info(&adapter->pdev->dev,
5596                  "Waiting for FW to be ready after EEH reset\n");
5597         status = be_fw_wait_ready(adapter);
5598         if (status)
5599                 return PCI_ERS_RESULT_DISCONNECT;
5600
5601         pci_cleanup_aer_uncorrect_error_status(pdev);
5602         be_clear_error(adapter, BE_CLEAR_ALL);
5603         return PCI_ERS_RESULT_RECOVERED;
5604 }
5605
5606 static void be_eeh_resume(struct pci_dev *pdev)
5607 {
5608         int status = 0;
5609         struct be_adapter *adapter = pci_get_drvdata(pdev);
5610
5611         dev_info(&adapter->pdev->dev, "EEH resume\n");
5612
5613         pci_save_state(pdev);
5614
5615         status = be_resume(adapter);
5616         if (status)
5617                 goto err;
5618
5619         be_roce_dev_add(adapter);
5620
5621         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5622         return;
5623 err:
5624         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
5625 }
5626
5627 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
5628 {
5629         struct be_adapter *adapter = pci_get_drvdata(pdev);
5630         struct be_resources vft_res = {0};
5631         int status;
5632
5633         if (!num_vfs)
5634                 be_vf_clear(adapter);
5635
5636         adapter->num_vfs = num_vfs;
5637
5638         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
5639                 dev_warn(&pdev->dev,
5640                          "Cannot disable VFs while they are assigned\n");
5641                 return -EBUSY;
5642         }
5643
5644         /* When the HW is in SRIOV capable configuration, the PF-pool resources
5645          * are equally distributed across the max-number of VFs. The user may
5646          * request only a subset of the max-vfs to be enabled.
5647          * Based on num_vfs, redistribute the resources across num_vfs so that
5648          * each VF will have access to more number of resources.
5649          * This facility is not available in BE3 FW.
5650          * Also, this is done by FW in Lancer chip.
5651          */
5652         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
5653                 be_calculate_vf_res(adapter, adapter->num_vfs,
5654                                     &vft_res);
5655                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
5656                                                  adapter->num_vfs, &vft_res);
5657                 if (status)
5658                         dev_err(&pdev->dev,
5659                                 "Failed to optimize SR-IOV resources\n");
5660         }
5661
5662         status = be_get_resources(adapter);
5663         if (status)
5664                 return be_cmd_status(status);
5665
5666         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
5667         rtnl_lock();
5668         status = be_update_queues(adapter);
5669         rtnl_unlock();
5670         if (status)
5671                 return be_cmd_status(status);
5672
5673         if (adapter->num_vfs)
5674                 status = be_vf_setup(adapter);
5675
5676         if (!status)
5677                 return adapter->num_vfs;
5678
5679         return 0;
5680 }
5681
5682 static const struct pci_error_handlers be_eeh_handlers = {
5683         .error_detected = be_eeh_err_detected,
5684         .slot_reset = be_eeh_reset,
5685         .resume = be_eeh_resume,
5686 };
5687
5688 static struct pci_driver be_driver = {
5689         .name = DRV_NAME,
5690         .id_table = be_dev_ids,
5691         .probe = be_probe,
5692         .remove = be_remove,
5693         .suspend = be_suspend,
5694         .resume = be_pci_resume,
5695         .shutdown = be_shutdown,
5696         .sriov_configure = be_pci_sriov_configure,
5697         .err_handler = &be_eeh_handlers
5698 };
5699
5700 static int __init be_init_module(void)
5701 {
5702         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
5703             rx_frag_size != 2048) {
5704                 printk(KERN_WARNING DRV_NAME
5705                         " : Module param rx_frag_size must be 2048/4096/8192."
5706                         " Using 2048\n");
5707                 rx_frag_size = 2048;
5708         }
5709
5710         if (num_vfs > 0) {
5711                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
5712                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
5713         }
5714
5715         return pci_register_driver(&be_driver);
5716 }
5717 module_init(be_init_module);
5718
5719 static void __exit be_exit_module(void)
5720 {
5721         pci_unregister_driver(&be_driver);
5722 }
5723 module_exit(be_exit_module);