Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[cascardo/linux.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
90         /* required last entry */
91         {0, }
92 };
93
94 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
95
96 void igb_reset(struct igb_adapter *);
97 static int igb_setup_all_tx_resources(struct igb_adapter *);
98 static int igb_setup_all_rx_resources(struct igb_adapter *);
99 static void igb_free_all_tx_resources(struct igb_adapter *);
100 static void igb_free_all_rx_resources(struct igb_adapter *);
101 static void igb_setup_mrqc(struct igb_adapter *);
102 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
103 static void __devexit igb_remove(struct pci_dev *pdev);
104 static int igb_sw_init(struct igb_adapter *);
105 static int igb_open(struct net_device *);
106 static int igb_close(struct net_device *);
107 static void igb_configure_tx(struct igb_adapter *);
108 static void igb_configure_rx(struct igb_adapter *);
109 static void igb_clean_all_tx_rings(struct igb_adapter *);
110 static void igb_clean_all_rx_rings(struct igb_adapter *);
111 static void igb_clean_tx_ring(struct igb_ring *);
112 static void igb_clean_rx_ring(struct igb_ring *);
113 static void igb_set_rx_mode(struct net_device *);
114 static void igb_update_phy_info(unsigned long);
115 static void igb_watchdog(unsigned long);
116 static void igb_watchdog_task(struct work_struct *);
117 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
118 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
119                                                  struct rtnl_link_stats64 *stats);
120 static int igb_change_mtu(struct net_device *, int);
121 static int igb_set_mac(struct net_device *, void *);
122 static void igb_set_uta(struct igb_adapter *adapter);
123 static irqreturn_t igb_intr(int irq, void *);
124 static irqreturn_t igb_intr_msi(int irq, void *);
125 static irqreturn_t igb_msix_other(int irq, void *);
126 static irqreturn_t igb_msix_ring(int irq, void *);
127 #ifdef CONFIG_IGB_DCA
128 static void igb_update_dca(struct igb_q_vector *);
129 static void igb_setup_dca(struct igb_adapter *);
130 #endif /* CONFIG_IGB_DCA */
131 static bool igb_clean_tx_irq(struct igb_q_vector *);
132 static int igb_poll(struct napi_struct *, int);
133 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
134 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
135 static void igb_tx_timeout(struct net_device *);
136 static void igb_reset_task(struct work_struct *);
137 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
138 static void igb_vlan_rx_add_vid(struct net_device *, u16);
139 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
140 static void igb_restore_vlan(struct igb_adapter *);
141 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
142 static void igb_ping_all_vfs(struct igb_adapter *);
143 static void igb_msg_task(struct igb_adapter *);
144 static void igb_vmm_control(struct igb_adapter *);
145 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
146 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
147 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
148 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
149                                int vf, u16 vlan, u8 qos);
150 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
151 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
152                                  struct ifla_vf_info *ivi);
153
154 #ifdef CONFIG_PM
155 static int igb_suspend(struct pci_dev *, pm_message_t);
156 static int igb_resume(struct pci_dev *);
157 #endif
158 static void igb_shutdown(struct pci_dev *);
159 #ifdef CONFIG_IGB_DCA
160 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
161 static struct notifier_block dca_notifier = {
162         .notifier_call  = igb_notify_dca,
163         .next           = NULL,
164         .priority       = 0
165 };
166 #endif
167 #ifdef CONFIG_NET_POLL_CONTROLLER
168 /* for netdump / net console */
169 static void igb_netpoll(struct net_device *);
170 #endif
171 #ifdef CONFIG_PCI_IOV
172 static unsigned int max_vfs = 0;
173 module_param(max_vfs, uint, 0);
174 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
175                  "per physical function");
176 #endif /* CONFIG_PCI_IOV */
177
178 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
179                      pci_channel_state_t);
180 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
181 static void igb_io_resume(struct pci_dev *);
182
183 static struct pci_error_handlers igb_err_handler = {
184         .error_detected = igb_io_error_detected,
185         .slot_reset = igb_io_slot_reset,
186         .resume = igb_io_resume,
187 };
188
189
190 static struct pci_driver igb_driver = {
191         .name     = igb_driver_name,
192         .id_table = igb_pci_tbl,
193         .probe    = igb_probe,
194         .remove   = __devexit_p(igb_remove),
195 #ifdef CONFIG_PM
196         /* Power Managment Hooks */
197         .suspend  = igb_suspend,
198         .resume   = igb_resume,
199 #endif
200         .shutdown = igb_shutdown,
201         .err_handler = &igb_err_handler
202 };
203
204 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
205 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
206 MODULE_LICENSE("GPL");
207 MODULE_VERSION(DRV_VERSION);
208
209 struct igb_reg_info {
210         u32 ofs;
211         char *name;
212 };
213
214 static const struct igb_reg_info igb_reg_info_tbl[] = {
215
216         /* General Registers */
217         {E1000_CTRL, "CTRL"},
218         {E1000_STATUS, "STATUS"},
219         {E1000_CTRL_EXT, "CTRL_EXT"},
220
221         /* Interrupt Registers */
222         {E1000_ICR, "ICR"},
223
224         /* RX Registers */
225         {E1000_RCTL, "RCTL"},
226         {E1000_RDLEN(0), "RDLEN"},
227         {E1000_RDH(0), "RDH"},
228         {E1000_RDT(0), "RDT"},
229         {E1000_RXDCTL(0), "RXDCTL"},
230         {E1000_RDBAL(0), "RDBAL"},
231         {E1000_RDBAH(0), "RDBAH"},
232
233         /* TX Registers */
234         {E1000_TCTL, "TCTL"},
235         {E1000_TDBAL(0), "TDBAL"},
236         {E1000_TDBAH(0), "TDBAH"},
237         {E1000_TDLEN(0), "TDLEN"},
238         {E1000_TDH(0), "TDH"},
239         {E1000_TDT(0), "TDT"},
240         {E1000_TXDCTL(0), "TXDCTL"},
241         {E1000_TDFH, "TDFH"},
242         {E1000_TDFT, "TDFT"},
243         {E1000_TDFHS, "TDFHS"},
244         {E1000_TDFPC, "TDFPC"},
245
246         /* List Terminator */
247         {}
248 };
249
250 /*
251  * igb_regdump - register printout routine
252  */
253 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
254 {
255         int n = 0;
256         char rname[16];
257         u32 regs[8];
258
259         switch (reginfo->ofs) {
260         case E1000_RDLEN(0):
261                 for (n = 0; n < 4; n++)
262                         regs[n] = rd32(E1000_RDLEN(n));
263                 break;
264         case E1000_RDH(0):
265                 for (n = 0; n < 4; n++)
266                         regs[n] = rd32(E1000_RDH(n));
267                 break;
268         case E1000_RDT(0):
269                 for (n = 0; n < 4; n++)
270                         regs[n] = rd32(E1000_RDT(n));
271                 break;
272         case E1000_RXDCTL(0):
273                 for (n = 0; n < 4; n++)
274                         regs[n] = rd32(E1000_RXDCTL(n));
275                 break;
276         case E1000_RDBAL(0):
277                 for (n = 0; n < 4; n++)
278                         regs[n] = rd32(E1000_RDBAL(n));
279                 break;
280         case E1000_RDBAH(0):
281                 for (n = 0; n < 4; n++)
282                         regs[n] = rd32(E1000_RDBAH(n));
283                 break;
284         case E1000_TDBAL(0):
285                 for (n = 0; n < 4; n++)
286                         regs[n] = rd32(E1000_RDBAL(n));
287                 break;
288         case E1000_TDBAH(0):
289                 for (n = 0; n < 4; n++)
290                         regs[n] = rd32(E1000_TDBAH(n));
291                 break;
292         case E1000_TDLEN(0):
293                 for (n = 0; n < 4; n++)
294                         regs[n] = rd32(E1000_TDLEN(n));
295                 break;
296         case E1000_TDH(0):
297                 for (n = 0; n < 4; n++)
298                         regs[n] = rd32(E1000_TDH(n));
299                 break;
300         case E1000_TDT(0):
301                 for (n = 0; n < 4; n++)
302                         regs[n] = rd32(E1000_TDT(n));
303                 break;
304         case E1000_TXDCTL(0):
305                 for (n = 0; n < 4; n++)
306                         regs[n] = rd32(E1000_TXDCTL(n));
307                 break;
308         default:
309                 printk(KERN_INFO "%-15s %08x\n",
310                         reginfo->name, rd32(reginfo->ofs));
311                 return;
312         }
313
314         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
315         printk(KERN_INFO "%-15s ", rname);
316         for (n = 0; n < 4; n++)
317                 printk(KERN_CONT "%08x ", regs[n]);
318         printk(KERN_CONT "\n");
319 }
320
321 /*
322  * igb_dump - Print registers, tx-rings and rx-rings
323  */
324 static void igb_dump(struct igb_adapter *adapter)
325 {
326         struct net_device *netdev = adapter->netdev;
327         struct e1000_hw *hw = &adapter->hw;
328         struct igb_reg_info *reginfo;
329         int n = 0;
330         struct igb_ring *tx_ring;
331         union e1000_adv_tx_desc *tx_desc;
332         struct my_u0 { u64 a; u64 b; } *u0;
333         struct igb_buffer *buffer_info;
334         struct igb_ring *rx_ring;
335         union e1000_adv_rx_desc *rx_desc;
336         u32 staterr;
337         int i = 0;
338
339         if (!netif_msg_hw(adapter))
340                 return;
341
342         /* Print netdevice Info */
343         if (netdev) {
344                 dev_info(&adapter->pdev->dev, "Net device Info\n");
345                 printk(KERN_INFO "Device Name     state            "
346                         "trans_start      last_rx\n");
347                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
348                 netdev->name,
349                 netdev->state,
350                 netdev->trans_start,
351                 netdev->last_rx);
352         }
353
354         /* Print Registers */
355         dev_info(&adapter->pdev->dev, "Register Dump\n");
356         printk(KERN_INFO " Register Name   Value\n");
357         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
358              reginfo->name; reginfo++) {
359                 igb_regdump(hw, reginfo);
360         }
361
362         /* Print TX Ring Summary */
363         if (!netdev || !netif_running(netdev))
364                 goto exit;
365
366         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
367         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
368                 " leng ntw timestamp\n");
369         for (n = 0; n < adapter->num_tx_queues; n++) {
370                 tx_ring = adapter->tx_ring[n];
371                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
372                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
373                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
374                            (u64)buffer_info->dma,
375                            buffer_info->length,
376                            buffer_info->next_to_watch,
377                            (u64)buffer_info->time_stamp);
378         }
379
380         /* Print TX Rings */
381         if (!netif_msg_tx_done(adapter))
382                 goto rx_ring_summary;
383
384         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
385
386         /* Transmit Descriptor Formats
387          *
388          * Advanced Transmit Descriptor
389          *   +--------------------------------------------------------------+
390          * 0 |         Buffer Address [63:0]                                |
391          *   +--------------------------------------------------------------+
392          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
393          *   +--------------------------------------------------------------+
394          *   63      46 45    40 39 38 36 35 32 31   24             15       0
395          */
396
397         for (n = 0; n < adapter->num_tx_queues; n++) {
398                 tx_ring = adapter->tx_ring[n];
399                 printk(KERN_INFO "------------------------------------\n");
400                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
401                 printk(KERN_INFO "------------------------------------\n");
402                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
403                         "[PlPOCIStDDM Ln] [bi->dma       ] "
404                         "leng  ntw timestamp        bi->skb\n");
405
406                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
407                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
408                         buffer_info = &tx_ring->buffer_info[i];
409                         u0 = (struct my_u0 *)tx_desc;
410                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
411                                 " %04X  %3X %016llX %p", i,
412                                 le64_to_cpu(u0->a),
413                                 le64_to_cpu(u0->b),
414                                 (u64)buffer_info->dma,
415                                 buffer_info->length,
416                                 buffer_info->next_to_watch,
417                                 (u64)buffer_info->time_stamp,
418                                 buffer_info->skb);
419                         if (i == tx_ring->next_to_use &&
420                                 i == tx_ring->next_to_clean)
421                                 printk(KERN_CONT " NTC/U\n");
422                         else if (i == tx_ring->next_to_use)
423                                 printk(KERN_CONT " NTU\n");
424                         else if (i == tx_ring->next_to_clean)
425                                 printk(KERN_CONT " NTC\n");
426                         else
427                                 printk(KERN_CONT "\n");
428
429                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
430                                 print_hex_dump(KERN_INFO, "",
431                                         DUMP_PREFIX_ADDRESS,
432                                         16, 1, phys_to_virt(buffer_info->dma),
433                                         buffer_info->length, true);
434                 }
435         }
436
437         /* Print RX Rings Summary */
438 rx_ring_summary:
439         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
440         printk(KERN_INFO "Queue [NTU] [NTC]\n");
441         for (n = 0; n < adapter->num_rx_queues; n++) {
442                 rx_ring = adapter->rx_ring[n];
443                 printk(KERN_INFO " %5d %5X %5X\n", n,
444                            rx_ring->next_to_use, rx_ring->next_to_clean);
445         }
446
447         /* Print RX Rings */
448         if (!netif_msg_rx_status(adapter))
449                 goto exit;
450
451         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
452
453         /* Advanced Receive Descriptor (Read) Format
454          *    63                                           1        0
455          *    +-----------------------------------------------------+
456          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
457          *    +----------------------------------------------+------+
458          *  8 |       Header Buffer Address [63:1]           |  DD  |
459          *    +-----------------------------------------------------+
460          *
461          *
462          * Advanced Receive Descriptor (Write-Back) Format
463          *
464          *   63       48 47    32 31  30      21 20 17 16   4 3     0
465          *   +------------------------------------------------------+
466          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
467          *   | Checksum   Ident  |   |           |    | Type | Type |
468          *   +------------------------------------------------------+
469          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
470          *   +------------------------------------------------------+
471          *   63       48 47    32 31            20 19               0
472          */
473
474         for (n = 0; n < adapter->num_rx_queues; n++) {
475                 rx_ring = adapter->rx_ring[n];
476                 printk(KERN_INFO "------------------------------------\n");
477                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
478                 printk(KERN_INFO "------------------------------------\n");
479                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
480                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
481                         "<-- Adv Rx Read format\n");
482                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
483                         "[vl er S cks ln] ---------------- [bi->skb] "
484                         "<-- Adv Rx Write-Back format\n");
485
486                 for (i = 0; i < rx_ring->count; i++) {
487                         buffer_info = &rx_ring->buffer_info[i];
488                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
489                         u0 = (struct my_u0 *)rx_desc;
490                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
491                         if (staterr & E1000_RXD_STAT_DD) {
492                                 /* Descriptor Done */
493                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
494                                         "%016llX ---------------- %p", i,
495                                         le64_to_cpu(u0->a),
496                                         le64_to_cpu(u0->b),
497                                         buffer_info->skb);
498                         } else {
499                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
500                                         "%016llX %016llX %p", i,
501                                         le64_to_cpu(u0->a),
502                                         le64_to_cpu(u0->b),
503                                         (u64)buffer_info->dma,
504                                         buffer_info->skb);
505
506                                 if (netif_msg_pktdata(adapter)) {
507                                         print_hex_dump(KERN_INFO, "",
508                                                 DUMP_PREFIX_ADDRESS,
509                                                 16, 1,
510                                                 phys_to_virt(buffer_info->dma),
511                                                 rx_ring->rx_buffer_len, true);
512                                         if (rx_ring->rx_buffer_len
513                                                 < IGB_RXBUFFER_1024)
514                                                 print_hex_dump(KERN_INFO, "",
515                                                   DUMP_PREFIX_ADDRESS,
516                                                   16, 1,
517                                                   phys_to_virt(
518                                                     buffer_info->page_dma +
519                                                     buffer_info->page_offset),
520                                                   PAGE_SIZE/2, true);
521                                 }
522                         }
523
524                         if (i == rx_ring->next_to_use)
525                                 printk(KERN_CONT " NTU\n");
526                         else if (i == rx_ring->next_to_clean)
527                                 printk(KERN_CONT " NTC\n");
528                         else
529                                 printk(KERN_CONT "\n");
530
531                 }
532         }
533
534 exit:
535         return;
536 }
537
538
539 /**
540  * igb_read_clock - read raw cycle counter (to be used by time counter)
541  */
542 static cycle_t igb_read_clock(const struct cyclecounter *tc)
543 {
544         struct igb_adapter *adapter =
545                 container_of(tc, struct igb_adapter, cycles);
546         struct e1000_hw *hw = &adapter->hw;
547         u64 stamp = 0;
548         int shift = 0;
549
550         /*
551          * The timestamp latches on lowest register read. For the 82580
552          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
553          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
554          */
555         if (hw->mac.type == e1000_82580) {
556                 stamp = rd32(E1000_SYSTIMR) >> 8;
557                 shift = IGB_82580_TSYNC_SHIFT;
558         }
559
560         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
561         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
562         return stamp;
563 }
564
565 /**
566  * igb_get_hw_dev - return device
567  * used by hardware layer to print debugging information
568  **/
569 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
570 {
571         struct igb_adapter *adapter = hw->back;
572         return adapter->netdev;
573 }
574
575 /**
576  * igb_init_module - Driver Registration Routine
577  *
578  * igb_init_module is the first routine called when the driver is
579  * loaded. All it does is register with the PCI subsystem.
580  **/
581 static int __init igb_init_module(void)
582 {
583         int ret;
584         printk(KERN_INFO "%s - version %s\n",
585                igb_driver_string, igb_driver_version);
586
587         printk(KERN_INFO "%s\n", igb_copyright);
588
589 #ifdef CONFIG_IGB_DCA
590         dca_register_notify(&dca_notifier);
591 #endif
592         ret = pci_register_driver(&igb_driver);
593         return ret;
594 }
595
596 module_init(igb_init_module);
597
598 /**
599  * igb_exit_module - Driver Exit Cleanup Routine
600  *
601  * igb_exit_module is called just before the driver is removed
602  * from memory.
603  **/
604 static void __exit igb_exit_module(void)
605 {
606 #ifdef CONFIG_IGB_DCA
607         dca_unregister_notify(&dca_notifier);
608 #endif
609         pci_unregister_driver(&igb_driver);
610 }
611
612 module_exit(igb_exit_module);
613
614 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
615 /**
616  * igb_cache_ring_register - Descriptor ring to register mapping
617  * @adapter: board private structure to initialize
618  *
619  * Once we know the feature-set enabled for the device, we'll cache
620  * the register offset the descriptor ring is assigned to.
621  **/
622 static void igb_cache_ring_register(struct igb_adapter *adapter)
623 {
624         int i = 0, j = 0;
625         u32 rbase_offset = adapter->vfs_allocated_count;
626
627         switch (adapter->hw.mac.type) {
628         case e1000_82576:
629                 /* The queues are allocated for virtualization such that VF 0
630                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
631                  * In order to avoid collision we start at the first free queue
632                  * and continue consuming queues in the same sequence
633                  */
634                 if (adapter->vfs_allocated_count) {
635                         for (; i < adapter->rss_queues; i++)
636                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
637                                                                Q_IDX_82576(i);
638                 }
639         case e1000_82575:
640         case e1000_82580:
641         case e1000_i350:
642         default:
643                 for (; i < adapter->num_rx_queues; i++)
644                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
645                 for (; j < adapter->num_tx_queues; j++)
646                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
647                 break;
648         }
649 }
650
651 static void igb_free_queues(struct igb_adapter *adapter)
652 {
653         int i;
654
655         for (i = 0; i < adapter->num_tx_queues; i++) {
656                 kfree(adapter->tx_ring[i]);
657                 adapter->tx_ring[i] = NULL;
658         }
659         for (i = 0; i < adapter->num_rx_queues; i++) {
660                 kfree(adapter->rx_ring[i]);
661                 adapter->rx_ring[i] = NULL;
662         }
663         adapter->num_rx_queues = 0;
664         adapter->num_tx_queues = 0;
665 }
666
667 /**
668  * igb_alloc_queues - Allocate memory for all rings
669  * @adapter: board private structure to initialize
670  *
671  * We allocate one ring per queue at run-time since we don't know the
672  * number of queues at compile-time.
673  **/
674 static int igb_alloc_queues(struct igb_adapter *adapter)
675 {
676         struct igb_ring *ring;
677         int i;
678
679         for (i = 0; i < adapter->num_tx_queues; i++) {
680                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
681                 if (!ring)
682                         goto err;
683                 ring->count = adapter->tx_ring_count;
684                 ring->queue_index = i;
685                 ring->dev = &adapter->pdev->dev;
686                 ring->netdev = adapter->netdev;
687                 /* For 82575, context index must be unique per ring. */
688                 if (adapter->hw.mac.type == e1000_82575)
689                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
690                 adapter->tx_ring[i] = ring;
691         }
692
693         for (i = 0; i < adapter->num_rx_queues; i++) {
694                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
695                 if (!ring)
696                         goto err;
697                 ring->count = adapter->rx_ring_count;
698                 ring->queue_index = i;
699                 ring->dev = &adapter->pdev->dev;
700                 ring->netdev = adapter->netdev;
701                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
702                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
703                 /* set flag indicating ring supports SCTP checksum offload */
704                 if (adapter->hw.mac.type >= e1000_82576)
705                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
706                 adapter->rx_ring[i] = ring;
707         }
708
709         igb_cache_ring_register(adapter);
710
711         return 0;
712
713 err:
714         igb_free_queues(adapter);
715
716         return -ENOMEM;
717 }
718
719 #define IGB_N0_QUEUE -1
720 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
721 {
722         u32 msixbm = 0;
723         struct igb_adapter *adapter = q_vector->adapter;
724         struct e1000_hw *hw = &adapter->hw;
725         u32 ivar, index;
726         int rx_queue = IGB_N0_QUEUE;
727         int tx_queue = IGB_N0_QUEUE;
728
729         if (q_vector->rx_ring)
730                 rx_queue = q_vector->rx_ring->reg_idx;
731         if (q_vector->tx_ring)
732                 tx_queue = q_vector->tx_ring->reg_idx;
733
734         switch (hw->mac.type) {
735         case e1000_82575:
736                 /* The 82575 assigns vectors using a bitmask, which matches the
737                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
738                    or more queues to a vector, we write the appropriate bits
739                    into the MSIXBM register for that vector. */
740                 if (rx_queue > IGB_N0_QUEUE)
741                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
742                 if (tx_queue > IGB_N0_QUEUE)
743                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
744                 if (!adapter->msix_entries && msix_vector == 0)
745                         msixbm |= E1000_EIMS_OTHER;
746                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
747                 q_vector->eims_value = msixbm;
748                 break;
749         case e1000_82576:
750                 /* 82576 uses a table-based method for assigning vectors.
751                    Each queue has a single entry in the table to which we write
752                    a vector number along with a "valid" bit.  Sadly, the layout
753                    of the table is somewhat counterintuitive. */
754                 if (rx_queue > IGB_N0_QUEUE) {
755                         index = (rx_queue & 0x7);
756                         ivar = array_rd32(E1000_IVAR0, index);
757                         if (rx_queue < 8) {
758                                 /* vector goes into low byte of register */
759                                 ivar = ivar & 0xFFFFFF00;
760                                 ivar |= msix_vector | E1000_IVAR_VALID;
761                         } else {
762                                 /* vector goes into third byte of register */
763                                 ivar = ivar & 0xFF00FFFF;
764                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
765                         }
766                         array_wr32(E1000_IVAR0, index, ivar);
767                 }
768                 if (tx_queue > IGB_N0_QUEUE) {
769                         index = (tx_queue & 0x7);
770                         ivar = array_rd32(E1000_IVAR0, index);
771                         if (tx_queue < 8) {
772                                 /* vector goes into second byte of register */
773                                 ivar = ivar & 0xFFFF00FF;
774                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
775                         } else {
776                                 /* vector goes into high byte of register */
777                                 ivar = ivar & 0x00FFFFFF;
778                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
779                         }
780                         array_wr32(E1000_IVAR0, index, ivar);
781                 }
782                 q_vector->eims_value = 1 << msix_vector;
783                 break;
784         case e1000_82580:
785         case e1000_i350:
786                 /* 82580 uses the same table-based approach as 82576 but has fewer
787                    entries as a result we carry over for queues greater than 4. */
788                 if (rx_queue > IGB_N0_QUEUE) {
789                         index = (rx_queue >> 1);
790                         ivar = array_rd32(E1000_IVAR0, index);
791                         if (rx_queue & 0x1) {
792                                 /* vector goes into third byte of register */
793                                 ivar = ivar & 0xFF00FFFF;
794                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
795                         } else {
796                                 /* vector goes into low byte of register */
797                                 ivar = ivar & 0xFFFFFF00;
798                                 ivar |= msix_vector | E1000_IVAR_VALID;
799                         }
800                         array_wr32(E1000_IVAR0, index, ivar);
801                 }
802                 if (tx_queue > IGB_N0_QUEUE) {
803                         index = (tx_queue >> 1);
804                         ivar = array_rd32(E1000_IVAR0, index);
805                         if (tx_queue & 0x1) {
806                                 /* vector goes into high byte of register */
807                                 ivar = ivar & 0x00FFFFFF;
808                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
809                         } else {
810                                 /* vector goes into second byte of register */
811                                 ivar = ivar & 0xFFFF00FF;
812                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
813                         }
814                         array_wr32(E1000_IVAR0, index, ivar);
815                 }
816                 q_vector->eims_value = 1 << msix_vector;
817                 break;
818         default:
819                 BUG();
820                 break;
821         }
822
823         /* add q_vector eims value to global eims_enable_mask */
824         adapter->eims_enable_mask |= q_vector->eims_value;
825
826         /* configure q_vector to set itr on first interrupt */
827         q_vector->set_itr = 1;
828 }
829
830 /**
831  * igb_configure_msix - Configure MSI-X hardware
832  *
833  * igb_configure_msix sets up the hardware to properly
834  * generate MSI-X interrupts.
835  **/
836 static void igb_configure_msix(struct igb_adapter *adapter)
837 {
838         u32 tmp;
839         int i, vector = 0;
840         struct e1000_hw *hw = &adapter->hw;
841
842         adapter->eims_enable_mask = 0;
843
844         /* set vector for other causes, i.e. link changes */
845         switch (hw->mac.type) {
846         case e1000_82575:
847                 tmp = rd32(E1000_CTRL_EXT);
848                 /* enable MSI-X PBA support*/
849                 tmp |= E1000_CTRL_EXT_PBA_CLR;
850
851                 /* Auto-Mask interrupts upon ICR read. */
852                 tmp |= E1000_CTRL_EXT_EIAME;
853                 tmp |= E1000_CTRL_EXT_IRCA;
854
855                 wr32(E1000_CTRL_EXT, tmp);
856
857                 /* enable msix_other interrupt */
858                 array_wr32(E1000_MSIXBM(0), vector++,
859                                       E1000_EIMS_OTHER);
860                 adapter->eims_other = E1000_EIMS_OTHER;
861
862                 break;
863
864         case e1000_82576:
865         case e1000_82580:
866         case e1000_i350:
867                 /* Turn on MSI-X capability first, or our settings
868                  * won't stick.  And it will take days to debug. */
869                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
870                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
871                                 E1000_GPIE_NSICR);
872
873                 /* enable msix_other interrupt */
874                 adapter->eims_other = 1 << vector;
875                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
876
877                 wr32(E1000_IVAR_MISC, tmp);
878                 break;
879         default:
880                 /* do nothing, since nothing else supports MSI-X */
881                 break;
882         } /* switch (hw->mac.type) */
883
884         adapter->eims_enable_mask |= adapter->eims_other;
885
886         for (i = 0; i < adapter->num_q_vectors; i++)
887                 igb_assign_vector(adapter->q_vector[i], vector++);
888
889         wrfl();
890 }
891
892 /**
893  * igb_request_msix - Initialize MSI-X interrupts
894  *
895  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
896  * kernel.
897  **/
898 static int igb_request_msix(struct igb_adapter *adapter)
899 {
900         struct net_device *netdev = adapter->netdev;
901         struct e1000_hw *hw = &adapter->hw;
902         int i, err = 0, vector = 0;
903
904         err = request_irq(adapter->msix_entries[vector].vector,
905                           igb_msix_other, 0, netdev->name, adapter);
906         if (err)
907                 goto out;
908         vector++;
909
910         for (i = 0; i < adapter->num_q_vectors; i++) {
911                 struct igb_q_vector *q_vector = adapter->q_vector[i];
912
913                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
914
915                 if (q_vector->rx_ring && q_vector->tx_ring)
916                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
917                                 q_vector->rx_ring->queue_index);
918                 else if (q_vector->tx_ring)
919                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
920                                 q_vector->tx_ring->queue_index);
921                 else if (q_vector->rx_ring)
922                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
923                                 q_vector->rx_ring->queue_index);
924                 else
925                         sprintf(q_vector->name, "%s-unused", netdev->name);
926
927                 err = request_irq(adapter->msix_entries[vector].vector,
928                                   igb_msix_ring, 0, q_vector->name,
929                                   q_vector);
930                 if (err)
931                         goto out;
932                 vector++;
933         }
934
935         igb_configure_msix(adapter);
936         return 0;
937 out:
938         return err;
939 }
940
941 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
942 {
943         if (adapter->msix_entries) {
944                 pci_disable_msix(adapter->pdev);
945                 kfree(adapter->msix_entries);
946                 adapter->msix_entries = NULL;
947         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
948                 pci_disable_msi(adapter->pdev);
949         }
950 }
951
952 /**
953  * igb_free_q_vectors - Free memory allocated for interrupt vectors
954  * @adapter: board private structure to initialize
955  *
956  * This function frees the memory allocated to the q_vectors.  In addition if
957  * NAPI is enabled it will delete any references to the NAPI struct prior
958  * to freeing the q_vector.
959  **/
960 static void igb_free_q_vectors(struct igb_adapter *adapter)
961 {
962         int v_idx;
963
964         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
965                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
966                 adapter->q_vector[v_idx] = NULL;
967                 if (!q_vector)
968                         continue;
969                 netif_napi_del(&q_vector->napi);
970                 kfree(q_vector);
971         }
972         adapter->num_q_vectors = 0;
973 }
974
975 /**
976  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
977  *
978  * This function resets the device so that it has 0 rx queues, tx queues, and
979  * MSI-X interrupts allocated.
980  */
981 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
982 {
983         igb_free_queues(adapter);
984         igb_free_q_vectors(adapter);
985         igb_reset_interrupt_capability(adapter);
986 }
987
988 /**
989  * igb_set_interrupt_capability - set MSI or MSI-X if supported
990  *
991  * Attempt to configure interrupts using the best available
992  * capabilities of the hardware and kernel.
993  **/
994 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
995 {
996         int err;
997         int numvecs, i;
998
999         /* Number of supported queues. */
1000         adapter->num_rx_queues = adapter->rss_queues;
1001         if (adapter->vfs_allocated_count)
1002                 adapter->num_tx_queues = 1;
1003         else
1004                 adapter->num_tx_queues = adapter->rss_queues;
1005
1006         /* start with one vector for every rx queue */
1007         numvecs = adapter->num_rx_queues;
1008
1009         /* if tx handler is separate add 1 for every tx queue */
1010         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1011                 numvecs += adapter->num_tx_queues;
1012
1013         /* store the number of vectors reserved for queues */
1014         adapter->num_q_vectors = numvecs;
1015
1016         /* add 1 vector for link status interrupts */
1017         numvecs++;
1018         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1019                                         GFP_KERNEL);
1020         if (!adapter->msix_entries)
1021                 goto msi_only;
1022
1023         for (i = 0; i < numvecs; i++)
1024                 adapter->msix_entries[i].entry = i;
1025
1026         err = pci_enable_msix(adapter->pdev,
1027                               adapter->msix_entries,
1028                               numvecs);
1029         if (err == 0)
1030                 goto out;
1031
1032         igb_reset_interrupt_capability(adapter);
1033
1034         /* If we can't do MSI-X, try MSI */
1035 msi_only:
1036 #ifdef CONFIG_PCI_IOV
1037         /* disable SR-IOV for non MSI-X configurations */
1038         if (adapter->vf_data) {
1039                 struct e1000_hw *hw = &adapter->hw;
1040                 /* disable iov and allow time for transactions to clear */
1041                 pci_disable_sriov(adapter->pdev);
1042                 msleep(500);
1043
1044                 kfree(adapter->vf_data);
1045                 adapter->vf_data = NULL;
1046                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1047                 msleep(100);
1048                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1049         }
1050 #endif
1051         adapter->vfs_allocated_count = 0;
1052         adapter->rss_queues = 1;
1053         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1054         adapter->num_rx_queues = 1;
1055         adapter->num_tx_queues = 1;
1056         adapter->num_q_vectors = 1;
1057         if (!pci_enable_msi(adapter->pdev))
1058                 adapter->flags |= IGB_FLAG_HAS_MSI;
1059 out:
1060         /* Notify the stack of the (possibly) reduced queue counts. */
1061         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1062         return netif_set_real_num_rx_queues(adapter->netdev,
1063                                             adapter->num_rx_queues);
1064 }
1065
1066 /**
1067  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1068  * @adapter: board private structure to initialize
1069  *
1070  * We allocate one q_vector per queue interrupt.  If allocation fails we
1071  * return -ENOMEM.
1072  **/
1073 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1074 {
1075         struct igb_q_vector *q_vector;
1076         struct e1000_hw *hw = &adapter->hw;
1077         int v_idx;
1078
1079         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1080                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1081                 if (!q_vector)
1082                         goto err_out;
1083                 q_vector->adapter = adapter;
1084                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1085                 q_vector->itr_val = IGB_START_ITR;
1086                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1087                 adapter->q_vector[v_idx] = q_vector;
1088         }
1089         return 0;
1090
1091 err_out:
1092         igb_free_q_vectors(adapter);
1093         return -ENOMEM;
1094 }
1095
1096 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1097                                       int ring_idx, int v_idx)
1098 {
1099         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1100
1101         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1102         q_vector->rx_ring->q_vector = q_vector;
1103         q_vector->itr_val = adapter->rx_itr_setting;
1104         if (q_vector->itr_val && q_vector->itr_val <= 3)
1105                 q_vector->itr_val = IGB_START_ITR;
1106 }
1107
1108 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1109                                       int ring_idx, int v_idx)
1110 {
1111         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1112
1113         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1114         q_vector->tx_ring->q_vector = q_vector;
1115         q_vector->itr_val = adapter->tx_itr_setting;
1116         if (q_vector->itr_val && q_vector->itr_val <= 3)
1117                 q_vector->itr_val = IGB_START_ITR;
1118 }
1119
1120 /**
1121  * igb_map_ring_to_vector - maps allocated queues to vectors
1122  *
1123  * This function maps the recently allocated queues to vectors.
1124  **/
1125 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1126 {
1127         int i;
1128         int v_idx = 0;
1129
1130         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1131             (adapter->num_q_vectors < adapter->num_tx_queues))
1132                 return -ENOMEM;
1133
1134         if (adapter->num_q_vectors >=
1135             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1136                 for (i = 0; i < adapter->num_rx_queues; i++)
1137                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1138                 for (i = 0; i < adapter->num_tx_queues; i++)
1139                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1140         } else {
1141                 for (i = 0; i < adapter->num_rx_queues; i++) {
1142                         if (i < adapter->num_tx_queues)
1143                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1144                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145                 }
1146                 for (; i < adapter->num_tx_queues; i++)
1147                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1148         }
1149         return 0;
1150 }
1151
1152 /**
1153  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1154  *
1155  * This function initializes the interrupts and allocates all of the queues.
1156  **/
1157 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1158 {
1159         struct pci_dev *pdev = adapter->pdev;
1160         int err;
1161
1162         err = igb_set_interrupt_capability(adapter);
1163         if (err)
1164                 return err;
1165
1166         err = igb_alloc_q_vectors(adapter);
1167         if (err) {
1168                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1169                 goto err_alloc_q_vectors;
1170         }
1171
1172         err = igb_alloc_queues(adapter);
1173         if (err) {
1174                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1175                 goto err_alloc_queues;
1176         }
1177
1178         err = igb_map_ring_to_vector(adapter);
1179         if (err) {
1180                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1181                 goto err_map_queues;
1182         }
1183
1184
1185         return 0;
1186 err_map_queues:
1187         igb_free_queues(adapter);
1188 err_alloc_queues:
1189         igb_free_q_vectors(adapter);
1190 err_alloc_q_vectors:
1191         igb_reset_interrupt_capability(adapter);
1192         return err;
1193 }
1194
1195 /**
1196  * igb_request_irq - initialize interrupts
1197  *
1198  * Attempts to configure interrupts using the best available
1199  * capabilities of the hardware and kernel.
1200  **/
1201 static int igb_request_irq(struct igb_adapter *adapter)
1202 {
1203         struct net_device *netdev = adapter->netdev;
1204         struct pci_dev *pdev = adapter->pdev;
1205         int err = 0;
1206
1207         if (adapter->msix_entries) {
1208                 err = igb_request_msix(adapter);
1209                 if (!err)
1210                         goto request_done;
1211                 /* fall back to MSI */
1212                 igb_clear_interrupt_scheme(adapter);
1213                 if (!pci_enable_msi(adapter->pdev))
1214                         adapter->flags |= IGB_FLAG_HAS_MSI;
1215                 igb_free_all_tx_resources(adapter);
1216                 igb_free_all_rx_resources(adapter);
1217                 adapter->num_tx_queues = 1;
1218                 adapter->num_rx_queues = 1;
1219                 adapter->num_q_vectors = 1;
1220                 err = igb_alloc_q_vectors(adapter);
1221                 if (err) {
1222                         dev_err(&pdev->dev,
1223                                 "Unable to allocate memory for vectors\n");
1224                         goto request_done;
1225                 }
1226                 err = igb_alloc_queues(adapter);
1227                 if (err) {
1228                         dev_err(&pdev->dev,
1229                                 "Unable to allocate memory for queues\n");
1230                         igb_free_q_vectors(adapter);
1231                         goto request_done;
1232                 }
1233                 igb_setup_all_tx_resources(adapter);
1234                 igb_setup_all_rx_resources(adapter);
1235         } else {
1236                 igb_assign_vector(adapter->q_vector[0], 0);
1237         }
1238
1239         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1240                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1241                                   netdev->name, adapter);
1242                 if (!err)
1243                         goto request_done;
1244
1245                 /* fall back to legacy interrupts */
1246                 igb_reset_interrupt_capability(adapter);
1247                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1248         }
1249
1250         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1251                           netdev->name, adapter);
1252
1253         if (err)
1254                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1255                         err);
1256
1257 request_done:
1258         return err;
1259 }
1260
1261 static void igb_free_irq(struct igb_adapter *adapter)
1262 {
1263         if (adapter->msix_entries) {
1264                 int vector = 0, i;
1265
1266                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1267
1268                 for (i = 0; i < adapter->num_q_vectors; i++) {
1269                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1270                         free_irq(adapter->msix_entries[vector++].vector,
1271                                  q_vector);
1272                 }
1273         } else {
1274                 free_irq(adapter->pdev->irq, adapter);
1275         }
1276 }
1277
1278 /**
1279  * igb_irq_disable - Mask off interrupt generation on the NIC
1280  * @adapter: board private structure
1281  **/
1282 static void igb_irq_disable(struct igb_adapter *adapter)
1283 {
1284         struct e1000_hw *hw = &adapter->hw;
1285
1286         /*
1287          * we need to be careful when disabling interrupts.  The VFs are also
1288          * mapped into these registers and so clearing the bits can cause
1289          * issues on the VF drivers so we only need to clear what we set
1290          */
1291         if (adapter->msix_entries) {
1292                 u32 regval = rd32(E1000_EIAM);
1293                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1294                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1295                 regval = rd32(E1000_EIAC);
1296                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1297         }
1298
1299         wr32(E1000_IAM, 0);
1300         wr32(E1000_IMC, ~0);
1301         wrfl();
1302         if (adapter->msix_entries) {
1303                 int i;
1304                 for (i = 0; i < adapter->num_q_vectors; i++)
1305                         synchronize_irq(adapter->msix_entries[i].vector);
1306         } else {
1307                 synchronize_irq(adapter->pdev->irq);
1308         }
1309 }
1310
1311 /**
1312  * igb_irq_enable - Enable default interrupt generation settings
1313  * @adapter: board private structure
1314  **/
1315 static void igb_irq_enable(struct igb_adapter *adapter)
1316 {
1317         struct e1000_hw *hw = &adapter->hw;
1318
1319         if (adapter->msix_entries) {
1320                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1321                 u32 regval = rd32(E1000_EIAC);
1322                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1323                 regval = rd32(E1000_EIAM);
1324                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1325                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1326                 if (adapter->vfs_allocated_count) {
1327                         wr32(E1000_MBVFIMR, 0xFF);
1328                         ims |= E1000_IMS_VMMB;
1329                 }
1330                 if (adapter->hw.mac.type == e1000_82580)
1331                         ims |= E1000_IMS_DRSTA;
1332
1333                 wr32(E1000_IMS, ims);
1334         } else {
1335                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1336                                 E1000_IMS_DRSTA);
1337                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1338                                 E1000_IMS_DRSTA);
1339         }
1340 }
1341
1342 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1343 {
1344         struct e1000_hw *hw = &adapter->hw;
1345         u16 vid = adapter->hw.mng_cookie.vlan_id;
1346         u16 old_vid = adapter->mng_vlan_id;
1347
1348         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1349                 /* add VID to filter table */
1350                 igb_vfta_set(hw, vid, true);
1351                 adapter->mng_vlan_id = vid;
1352         } else {
1353                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1354         }
1355
1356         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1357             (vid != old_vid) &&
1358             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1359                 /* remove VID from filter table */
1360                 igb_vfta_set(hw, old_vid, false);
1361         }
1362 }
1363
1364 /**
1365  * igb_release_hw_control - release control of the h/w to f/w
1366  * @adapter: address of board private structure
1367  *
1368  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1369  * For ASF and Pass Through versions of f/w this means that the
1370  * driver is no longer loaded.
1371  *
1372  **/
1373 static void igb_release_hw_control(struct igb_adapter *adapter)
1374 {
1375         struct e1000_hw *hw = &adapter->hw;
1376         u32 ctrl_ext;
1377
1378         /* Let firmware take over control of h/w */
1379         ctrl_ext = rd32(E1000_CTRL_EXT);
1380         wr32(E1000_CTRL_EXT,
1381                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1382 }
1383
1384 /**
1385  * igb_get_hw_control - get control of the h/w from f/w
1386  * @adapter: address of board private structure
1387  *
1388  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1389  * For ASF and Pass Through versions of f/w this means that
1390  * the driver is loaded.
1391  *
1392  **/
1393 static void igb_get_hw_control(struct igb_adapter *adapter)
1394 {
1395         struct e1000_hw *hw = &adapter->hw;
1396         u32 ctrl_ext;
1397
1398         /* Let firmware know the driver has taken over */
1399         ctrl_ext = rd32(E1000_CTRL_EXT);
1400         wr32(E1000_CTRL_EXT,
1401                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1402 }
1403
1404 /**
1405  * igb_configure - configure the hardware for RX and TX
1406  * @adapter: private board structure
1407  **/
1408 static void igb_configure(struct igb_adapter *adapter)
1409 {
1410         struct net_device *netdev = adapter->netdev;
1411         int i;
1412
1413         igb_get_hw_control(adapter);
1414         igb_set_rx_mode(netdev);
1415
1416         igb_restore_vlan(adapter);
1417
1418         igb_setup_tctl(adapter);
1419         igb_setup_mrqc(adapter);
1420         igb_setup_rctl(adapter);
1421
1422         igb_configure_tx(adapter);
1423         igb_configure_rx(adapter);
1424
1425         igb_rx_fifo_flush_82575(&adapter->hw);
1426
1427         /* call igb_desc_unused which always leaves
1428          * at least 1 descriptor unused to make sure
1429          * next_to_use != next_to_clean */
1430         for (i = 0; i < adapter->num_rx_queues; i++) {
1431                 struct igb_ring *ring = adapter->rx_ring[i];
1432                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1433         }
1434 }
1435
1436 /**
1437  * igb_power_up_link - Power up the phy/serdes link
1438  * @adapter: address of board private structure
1439  **/
1440 void igb_power_up_link(struct igb_adapter *adapter)
1441 {
1442         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1443                 igb_power_up_phy_copper(&adapter->hw);
1444         else
1445                 igb_power_up_serdes_link_82575(&adapter->hw);
1446 }
1447
1448 /**
1449  * igb_power_down_link - Power down the phy/serdes link
1450  * @adapter: address of board private structure
1451  */
1452 static void igb_power_down_link(struct igb_adapter *adapter)
1453 {
1454         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1455                 igb_power_down_phy_copper_82575(&adapter->hw);
1456         else
1457                 igb_shutdown_serdes_link_82575(&adapter->hw);
1458 }
1459
1460 /**
1461  * igb_up - Open the interface and prepare it to handle traffic
1462  * @adapter: board private structure
1463  **/
1464 int igb_up(struct igb_adapter *adapter)
1465 {
1466         struct e1000_hw *hw = &adapter->hw;
1467         int i;
1468
1469         /* hardware has been reset, we need to reload some things */
1470         igb_configure(adapter);
1471
1472         clear_bit(__IGB_DOWN, &adapter->state);
1473
1474         for (i = 0; i < adapter->num_q_vectors; i++) {
1475                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1476                 napi_enable(&q_vector->napi);
1477         }
1478         if (adapter->msix_entries)
1479                 igb_configure_msix(adapter);
1480         else
1481                 igb_assign_vector(adapter->q_vector[0], 0);
1482
1483         /* Clear any pending interrupts. */
1484         rd32(E1000_ICR);
1485         igb_irq_enable(adapter);
1486
1487         /* notify VFs that reset has been completed */
1488         if (adapter->vfs_allocated_count) {
1489                 u32 reg_data = rd32(E1000_CTRL_EXT);
1490                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1491                 wr32(E1000_CTRL_EXT, reg_data);
1492         }
1493
1494         netif_tx_start_all_queues(adapter->netdev);
1495
1496         /* start the watchdog. */
1497         hw->mac.get_link_status = 1;
1498         schedule_work(&adapter->watchdog_task);
1499
1500         return 0;
1501 }
1502
1503 void igb_down(struct igb_adapter *adapter)
1504 {
1505         struct net_device *netdev = adapter->netdev;
1506         struct e1000_hw *hw = &adapter->hw;
1507         u32 tctl, rctl;
1508         int i;
1509
1510         /* signal that we're down so the interrupt handler does not
1511          * reschedule our watchdog timer */
1512         set_bit(__IGB_DOWN, &adapter->state);
1513
1514         /* disable receives in the hardware */
1515         rctl = rd32(E1000_RCTL);
1516         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1517         /* flush and sleep below */
1518
1519         netif_tx_stop_all_queues(netdev);
1520
1521         /* disable transmits in the hardware */
1522         tctl = rd32(E1000_TCTL);
1523         tctl &= ~E1000_TCTL_EN;
1524         wr32(E1000_TCTL, tctl);
1525         /* flush both disables and wait for them to finish */
1526         wrfl();
1527         msleep(10);
1528
1529         for (i = 0; i < adapter->num_q_vectors; i++) {
1530                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1531                 napi_disable(&q_vector->napi);
1532         }
1533
1534         igb_irq_disable(adapter);
1535
1536         del_timer_sync(&adapter->watchdog_timer);
1537         del_timer_sync(&adapter->phy_info_timer);
1538
1539         netif_carrier_off(netdev);
1540
1541         /* record the stats before reset*/
1542         spin_lock(&adapter->stats64_lock);
1543         igb_update_stats(adapter, &adapter->stats64);
1544         spin_unlock(&adapter->stats64_lock);
1545
1546         adapter->link_speed = 0;
1547         adapter->link_duplex = 0;
1548
1549         if (!pci_channel_offline(adapter->pdev))
1550                 igb_reset(adapter);
1551         igb_clean_all_tx_rings(adapter);
1552         igb_clean_all_rx_rings(adapter);
1553 #ifdef CONFIG_IGB_DCA
1554
1555         /* since we reset the hardware DCA settings were cleared */
1556         igb_setup_dca(adapter);
1557 #endif
1558 }
1559
1560 void igb_reinit_locked(struct igb_adapter *adapter)
1561 {
1562         WARN_ON(in_interrupt());
1563         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1564                 msleep(1);
1565         igb_down(adapter);
1566         igb_up(adapter);
1567         clear_bit(__IGB_RESETTING, &adapter->state);
1568 }
1569
1570 void igb_reset(struct igb_adapter *adapter)
1571 {
1572         struct pci_dev *pdev = adapter->pdev;
1573         struct e1000_hw *hw = &adapter->hw;
1574         struct e1000_mac_info *mac = &hw->mac;
1575         struct e1000_fc_info *fc = &hw->fc;
1576         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1577         u16 hwm;
1578
1579         /* Repartition Pba for greater than 9k mtu
1580          * To take effect CTRL.RST is required.
1581          */
1582         switch (mac->type) {
1583         case e1000_i350:
1584         case e1000_82580:
1585                 pba = rd32(E1000_RXPBS);
1586                 pba = igb_rxpbs_adjust_82580(pba);
1587                 break;
1588         case e1000_82576:
1589                 pba = rd32(E1000_RXPBS);
1590                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1591                 break;
1592         case e1000_82575:
1593         default:
1594                 pba = E1000_PBA_34K;
1595                 break;
1596         }
1597
1598         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1599             (mac->type < e1000_82576)) {
1600                 /* adjust PBA for jumbo frames */
1601                 wr32(E1000_PBA, pba);
1602
1603                 /* To maintain wire speed transmits, the Tx FIFO should be
1604                  * large enough to accommodate two full transmit packets,
1605                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1606                  * the Rx FIFO should be large enough to accommodate at least
1607                  * one full receive packet and is similarly rounded up and
1608                  * expressed in KB. */
1609                 pba = rd32(E1000_PBA);
1610                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1611                 tx_space = pba >> 16;
1612                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1613                 pba &= 0xffff;
1614                 /* the tx fifo also stores 16 bytes of information about the tx
1615                  * but don't include ethernet FCS because hardware appends it */
1616                 min_tx_space = (adapter->max_frame_size +
1617                                 sizeof(union e1000_adv_tx_desc) -
1618                                 ETH_FCS_LEN) * 2;
1619                 min_tx_space = ALIGN(min_tx_space, 1024);
1620                 min_tx_space >>= 10;
1621                 /* software strips receive CRC, so leave room for it */
1622                 min_rx_space = adapter->max_frame_size;
1623                 min_rx_space = ALIGN(min_rx_space, 1024);
1624                 min_rx_space >>= 10;
1625
1626                 /* If current Tx allocation is less than the min Tx FIFO size,
1627                  * and the min Tx FIFO size is less than the current Rx FIFO
1628                  * allocation, take space away from current Rx allocation */
1629                 if (tx_space < min_tx_space &&
1630                     ((min_tx_space - tx_space) < pba)) {
1631                         pba = pba - (min_tx_space - tx_space);
1632
1633                         /* if short on rx space, rx wins and must trump tx
1634                          * adjustment */
1635                         if (pba < min_rx_space)
1636                                 pba = min_rx_space;
1637                 }
1638                 wr32(E1000_PBA, pba);
1639         }
1640
1641         /* flow control settings */
1642         /* The high water mark must be low enough to fit one full frame
1643          * (or the size used for early receive) above it in the Rx FIFO.
1644          * Set it to the lower of:
1645          * - 90% of the Rx FIFO size, or
1646          * - the full Rx FIFO size minus one full frame */
1647         hwm = min(((pba << 10) * 9 / 10),
1648                         ((pba << 10) - 2 * adapter->max_frame_size));
1649
1650         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1651         fc->low_water = fc->high_water - 16;
1652         fc->pause_time = 0xFFFF;
1653         fc->send_xon = 1;
1654         fc->current_mode = fc->requested_mode;
1655
1656         /* disable receive for all VFs and wait one second */
1657         if (adapter->vfs_allocated_count) {
1658                 int i;
1659                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1660                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1661
1662                 /* ping all the active vfs to let them know we are going down */
1663                 igb_ping_all_vfs(adapter);
1664
1665                 /* disable transmits and receives */
1666                 wr32(E1000_VFRE, 0);
1667                 wr32(E1000_VFTE, 0);
1668         }
1669
1670         /* Allow time for pending master requests to run */
1671         hw->mac.ops.reset_hw(hw);
1672         wr32(E1000_WUC, 0);
1673
1674         if (hw->mac.ops.init_hw(hw))
1675                 dev_err(&pdev->dev, "Hardware Error\n");
1676
1677         if (hw->mac.type == e1000_82580) {
1678                 u32 reg = rd32(E1000_PCIEMISC);
1679                 wr32(E1000_PCIEMISC,
1680                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1681         }
1682         if (!netif_running(adapter->netdev))
1683                 igb_power_down_link(adapter);
1684
1685         igb_update_mng_vlan(adapter);
1686
1687         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1688         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1689
1690         igb_get_phy_info(hw);
1691 }
1692
1693 static const struct net_device_ops igb_netdev_ops = {
1694         .ndo_open               = igb_open,
1695         .ndo_stop               = igb_close,
1696         .ndo_start_xmit         = igb_xmit_frame_adv,
1697         .ndo_get_stats64        = igb_get_stats64,
1698         .ndo_set_rx_mode        = igb_set_rx_mode,
1699         .ndo_set_multicast_list = igb_set_rx_mode,
1700         .ndo_set_mac_address    = igb_set_mac,
1701         .ndo_change_mtu         = igb_change_mtu,
1702         .ndo_do_ioctl           = igb_ioctl,
1703         .ndo_tx_timeout         = igb_tx_timeout,
1704         .ndo_validate_addr      = eth_validate_addr,
1705         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1706         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1707         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1708         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1709         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1710         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1711         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1712 #ifdef CONFIG_NET_POLL_CONTROLLER
1713         .ndo_poll_controller    = igb_netpoll,
1714 #endif
1715 };
1716
1717 /**
1718  * igb_probe - Device Initialization Routine
1719  * @pdev: PCI device information struct
1720  * @ent: entry in igb_pci_tbl
1721  *
1722  * Returns 0 on success, negative on failure
1723  *
1724  * igb_probe initializes an adapter identified by a pci_dev structure.
1725  * The OS initialization, configuring of the adapter private structure,
1726  * and a hardware reset occur.
1727  **/
1728 static int __devinit igb_probe(struct pci_dev *pdev,
1729                                const struct pci_device_id *ent)
1730 {
1731         struct net_device *netdev;
1732         struct igb_adapter *adapter;
1733         struct e1000_hw *hw;
1734         u16 eeprom_data = 0;
1735         s32 ret_val;
1736         static int global_quad_port_a; /* global quad port a indication */
1737         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1738         unsigned long mmio_start, mmio_len;
1739         int err, pci_using_dac;
1740         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1741         u8 part_str[E1000_PBANUM_LENGTH];
1742
1743         /* Catch broken hardware that put the wrong VF device ID in
1744          * the PCIe SR-IOV capability.
1745          */
1746         if (pdev->is_virtfn) {
1747                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1748                      pci_name(pdev), pdev->vendor, pdev->device);
1749                 return -EINVAL;
1750         }
1751
1752         err = pci_enable_device_mem(pdev);
1753         if (err)
1754                 return err;
1755
1756         pci_using_dac = 0;
1757         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1758         if (!err) {
1759                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1760                 if (!err)
1761                         pci_using_dac = 1;
1762         } else {
1763                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1764                 if (err) {
1765                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1766                         if (err) {
1767                                 dev_err(&pdev->dev, "No usable DMA "
1768                                         "configuration, aborting\n");
1769                                 goto err_dma;
1770                         }
1771                 }
1772         }
1773
1774         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1775                                            IORESOURCE_MEM),
1776                                            igb_driver_name);
1777         if (err)
1778                 goto err_pci_reg;
1779
1780         pci_enable_pcie_error_reporting(pdev);
1781
1782         pci_set_master(pdev);
1783         pci_save_state(pdev);
1784
1785         err = -ENOMEM;
1786         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1787                                    IGB_ABS_MAX_TX_QUEUES);
1788         if (!netdev)
1789                 goto err_alloc_etherdev;
1790
1791         SET_NETDEV_DEV(netdev, &pdev->dev);
1792
1793         pci_set_drvdata(pdev, netdev);
1794         adapter = netdev_priv(netdev);
1795         adapter->netdev = netdev;
1796         adapter->pdev = pdev;
1797         hw = &adapter->hw;
1798         hw->back = adapter;
1799         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1800
1801         mmio_start = pci_resource_start(pdev, 0);
1802         mmio_len = pci_resource_len(pdev, 0);
1803
1804         err = -EIO;
1805         hw->hw_addr = ioremap(mmio_start, mmio_len);
1806         if (!hw->hw_addr)
1807                 goto err_ioremap;
1808
1809         netdev->netdev_ops = &igb_netdev_ops;
1810         igb_set_ethtool_ops(netdev);
1811         netdev->watchdog_timeo = 5 * HZ;
1812
1813         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1814
1815         netdev->mem_start = mmio_start;
1816         netdev->mem_end = mmio_start + mmio_len;
1817
1818         /* PCI config space info */
1819         hw->vendor_id = pdev->vendor;
1820         hw->device_id = pdev->device;
1821         hw->revision_id = pdev->revision;
1822         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1823         hw->subsystem_device_id = pdev->subsystem_device;
1824
1825         /* Copy the default MAC, PHY and NVM function pointers */
1826         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1827         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1828         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1829         /* Initialize skew-specific constants */
1830         err = ei->get_invariants(hw);
1831         if (err)
1832                 goto err_sw_init;
1833
1834         /* setup the private structure */
1835         err = igb_sw_init(adapter);
1836         if (err)
1837                 goto err_sw_init;
1838
1839         igb_get_bus_info_pcie(hw);
1840
1841         hw->phy.autoneg_wait_to_complete = false;
1842
1843         /* Copper options */
1844         if (hw->phy.media_type == e1000_media_type_copper) {
1845                 hw->phy.mdix = AUTO_ALL_MODES;
1846                 hw->phy.disable_polarity_correction = false;
1847                 hw->phy.ms_type = e1000_ms_hw_default;
1848         }
1849
1850         if (igb_check_reset_block(hw))
1851                 dev_info(&pdev->dev,
1852                         "PHY reset is blocked due to SOL/IDER session.\n");
1853
1854         netdev->features = NETIF_F_SG |
1855                            NETIF_F_IP_CSUM |
1856                            NETIF_F_HW_VLAN_TX |
1857                            NETIF_F_HW_VLAN_RX |
1858                            NETIF_F_HW_VLAN_FILTER;
1859
1860         netdev->features |= NETIF_F_IPV6_CSUM;
1861         netdev->features |= NETIF_F_TSO;
1862         netdev->features |= NETIF_F_TSO6;
1863         netdev->features |= NETIF_F_GRO;
1864
1865         netdev->vlan_features |= NETIF_F_TSO;
1866         netdev->vlan_features |= NETIF_F_TSO6;
1867         netdev->vlan_features |= NETIF_F_IP_CSUM;
1868         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1869         netdev->vlan_features |= NETIF_F_SG;
1870
1871         if (pci_using_dac) {
1872                 netdev->features |= NETIF_F_HIGHDMA;
1873                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1874         }
1875
1876         if (hw->mac.type >= e1000_82576)
1877                 netdev->features |= NETIF_F_SCTP_CSUM;
1878
1879         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1880
1881         /* before reading the NVM, reset the controller to put the device in a
1882          * known good starting state */
1883         hw->mac.ops.reset_hw(hw);
1884
1885         /* make sure the NVM is good */
1886         if (igb_validate_nvm_checksum(hw) < 0) {
1887                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1888                 err = -EIO;
1889                 goto err_eeprom;
1890         }
1891
1892         /* copy the MAC address out of the NVM */
1893         if (hw->mac.ops.read_mac_addr(hw))
1894                 dev_err(&pdev->dev, "NVM Read Error\n");
1895
1896         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1897         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1898
1899         if (!is_valid_ether_addr(netdev->perm_addr)) {
1900                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1901                 err = -EIO;
1902                 goto err_eeprom;
1903         }
1904
1905         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1906                     (unsigned long) adapter);
1907         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1908                     (unsigned long) adapter);
1909
1910         INIT_WORK(&adapter->reset_task, igb_reset_task);
1911         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1912
1913         /* Initialize link properties that are user-changeable */
1914         adapter->fc_autoneg = true;
1915         hw->mac.autoneg = true;
1916         hw->phy.autoneg_advertised = 0x2f;
1917
1918         hw->fc.requested_mode = e1000_fc_default;
1919         hw->fc.current_mode = e1000_fc_default;
1920
1921         igb_validate_mdi_setting(hw);
1922
1923         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1924          * enable the ACPI Magic Packet filter
1925          */
1926
1927         if (hw->bus.func == 0)
1928                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1929         else if (hw->mac.type == e1000_82580)
1930                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1931                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1932                                  &eeprom_data);
1933         else if (hw->bus.func == 1)
1934                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1935
1936         if (eeprom_data & eeprom_apme_mask)
1937                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1938
1939         /* now that we have the eeprom settings, apply the special cases where
1940          * the eeprom may be wrong or the board simply won't support wake on
1941          * lan on a particular port */
1942         switch (pdev->device) {
1943         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1944                 adapter->eeprom_wol = 0;
1945                 break;
1946         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1947         case E1000_DEV_ID_82576_FIBER:
1948         case E1000_DEV_ID_82576_SERDES:
1949                 /* Wake events only supported on port A for dual fiber
1950                  * regardless of eeprom setting */
1951                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1952                         adapter->eeprom_wol = 0;
1953                 break;
1954         case E1000_DEV_ID_82576_QUAD_COPPER:
1955         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1956                 /* if quad port adapter, disable WoL on all but port A */
1957                 if (global_quad_port_a != 0)
1958                         adapter->eeprom_wol = 0;
1959                 else
1960                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1961                 /* Reset for multiple quad port adapters */
1962                 if (++global_quad_port_a == 4)
1963                         global_quad_port_a = 0;
1964                 break;
1965         }
1966
1967         /* initialize the wol settings based on the eeprom settings */
1968         adapter->wol = adapter->eeprom_wol;
1969         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1970
1971         /* reset the hardware with the new settings */
1972         igb_reset(adapter);
1973
1974         /* let the f/w know that the h/w is now under the control of the
1975          * driver. */
1976         igb_get_hw_control(adapter);
1977
1978         strcpy(netdev->name, "eth%d");
1979         err = register_netdev(netdev);
1980         if (err)
1981                 goto err_register;
1982
1983         /* carrier off reporting is important to ethtool even BEFORE open */
1984         netif_carrier_off(netdev);
1985
1986 #ifdef CONFIG_IGB_DCA
1987         if (dca_add_requester(&pdev->dev) == 0) {
1988                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1989                 dev_info(&pdev->dev, "DCA enabled\n");
1990                 igb_setup_dca(adapter);
1991         }
1992
1993 #endif
1994         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1995         /* print bus type/speed/width info */
1996         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1997                  netdev->name,
1998                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1999                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2000                                                             "unknown"),
2001                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2002                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2003                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2004                    "unknown"),
2005                  netdev->dev_addr);
2006
2007         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2008         if (ret_val)
2009                 strcpy(part_str, "Unknown");
2010         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2011         dev_info(&pdev->dev,
2012                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2013                 adapter->msix_entries ? "MSI-X" :
2014                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2015                 adapter->num_rx_queues, adapter->num_tx_queues);
2016
2017         return 0;
2018
2019 err_register:
2020         igb_release_hw_control(adapter);
2021 err_eeprom:
2022         if (!igb_check_reset_block(hw))
2023                 igb_reset_phy(hw);
2024
2025         if (hw->flash_address)
2026                 iounmap(hw->flash_address);
2027 err_sw_init:
2028         igb_clear_interrupt_scheme(adapter);
2029         iounmap(hw->hw_addr);
2030 err_ioremap:
2031         free_netdev(netdev);
2032 err_alloc_etherdev:
2033         pci_release_selected_regions(pdev,
2034                                      pci_select_bars(pdev, IORESOURCE_MEM));
2035 err_pci_reg:
2036 err_dma:
2037         pci_disable_device(pdev);
2038         return err;
2039 }
2040
2041 /**
2042  * igb_remove - Device Removal Routine
2043  * @pdev: PCI device information struct
2044  *
2045  * igb_remove is called by the PCI subsystem to alert the driver
2046  * that it should release a PCI device.  The could be caused by a
2047  * Hot-Plug event, or because the driver is going to be removed from
2048  * memory.
2049  **/
2050 static void __devexit igb_remove(struct pci_dev *pdev)
2051 {
2052         struct net_device *netdev = pci_get_drvdata(pdev);
2053         struct igb_adapter *adapter = netdev_priv(netdev);
2054         struct e1000_hw *hw = &adapter->hw;
2055
2056         /*
2057          * The watchdog timer may be rescheduled, so explicitly
2058          * disable watchdog from being rescheduled.
2059          */
2060         set_bit(__IGB_DOWN, &adapter->state);
2061         del_timer_sync(&adapter->watchdog_timer);
2062         del_timer_sync(&adapter->phy_info_timer);
2063
2064         cancel_work_sync(&adapter->reset_task);
2065         cancel_work_sync(&adapter->watchdog_task);
2066
2067 #ifdef CONFIG_IGB_DCA
2068         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2069                 dev_info(&pdev->dev, "DCA disabled\n");
2070                 dca_remove_requester(&pdev->dev);
2071                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2072                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2073         }
2074 #endif
2075
2076         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2077          * would have already happened in close and is redundant. */
2078         igb_release_hw_control(adapter);
2079
2080         unregister_netdev(netdev);
2081
2082         igb_clear_interrupt_scheme(adapter);
2083
2084 #ifdef CONFIG_PCI_IOV
2085         /* reclaim resources allocated to VFs */
2086         if (adapter->vf_data) {
2087                 /* disable iov and allow time for transactions to clear */
2088                 pci_disable_sriov(pdev);
2089                 msleep(500);
2090
2091                 kfree(adapter->vf_data);
2092                 adapter->vf_data = NULL;
2093                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2094                 msleep(100);
2095                 dev_info(&pdev->dev, "IOV Disabled\n");
2096         }
2097 #endif
2098
2099         iounmap(hw->hw_addr);
2100         if (hw->flash_address)
2101                 iounmap(hw->flash_address);
2102         pci_release_selected_regions(pdev,
2103                                      pci_select_bars(pdev, IORESOURCE_MEM));
2104
2105         free_netdev(netdev);
2106
2107         pci_disable_pcie_error_reporting(pdev);
2108
2109         pci_disable_device(pdev);
2110 }
2111
2112 /**
2113  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2114  * @adapter: board private structure to initialize
2115  *
2116  * This function initializes the vf specific data storage and then attempts to
2117  * allocate the VFs.  The reason for ordering it this way is because it is much
2118  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2119  * the memory for the VFs.
2120  **/
2121 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2122 {
2123 #ifdef CONFIG_PCI_IOV
2124         struct pci_dev *pdev = adapter->pdev;
2125
2126         if (adapter->vfs_allocated_count) {
2127                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2128                                            sizeof(struct vf_data_storage),
2129                                            GFP_KERNEL);
2130                 /* if allocation failed then we do not support SR-IOV */
2131                 if (!adapter->vf_data) {
2132                         adapter->vfs_allocated_count = 0;
2133                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2134                                 "Data Storage\n");
2135                 }
2136         }
2137
2138         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2139                 kfree(adapter->vf_data);
2140                 adapter->vf_data = NULL;
2141 #endif /* CONFIG_PCI_IOV */
2142                 adapter->vfs_allocated_count = 0;
2143 #ifdef CONFIG_PCI_IOV
2144         } else {
2145                 unsigned char mac_addr[ETH_ALEN];
2146                 int i;
2147                 dev_info(&pdev->dev, "%d vfs allocated\n",
2148                          adapter->vfs_allocated_count);
2149                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2150                         random_ether_addr(mac_addr);
2151                         igb_set_vf_mac(adapter, i, mac_addr);
2152                 }
2153         }
2154 #endif /* CONFIG_PCI_IOV */
2155 }
2156
2157
2158 /**
2159  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2160  * @adapter: board private structure to initialize
2161  *
2162  * igb_init_hw_timer initializes the function pointer and values for the hw
2163  * timer found in hardware.
2164  **/
2165 static void igb_init_hw_timer(struct igb_adapter *adapter)
2166 {
2167         struct e1000_hw *hw = &adapter->hw;
2168
2169         switch (hw->mac.type) {
2170         case e1000_i350:
2171         case e1000_82580:
2172                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2173                 adapter->cycles.read = igb_read_clock;
2174                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2175                 adapter->cycles.mult = 1;
2176                 /*
2177                  * The 82580 timesync updates the system timer every 8ns by 8ns
2178                  * and the value cannot be shifted.  Instead we need to shift
2179                  * the registers to generate a 64bit timer value.  As a result
2180                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2181                  * 24 in order to generate a larger value for synchronization.
2182                  */
2183                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2184                 /* disable system timer temporarily by setting bit 31 */
2185                 wr32(E1000_TSAUXC, 0x80000000);
2186                 wrfl();
2187
2188                 /* Set registers so that rollover occurs soon to test this. */
2189                 wr32(E1000_SYSTIMR, 0x00000000);
2190                 wr32(E1000_SYSTIML, 0x80000000);
2191                 wr32(E1000_SYSTIMH, 0x000000FF);
2192                 wrfl();
2193
2194                 /* enable system timer by clearing bit 31 */
2195                 wr32(E1000_TSAUXC, 0x0);
2196                 wrfl();
2197
2198                 timecounter_init(&adapter->clock,
2199                                  &adapter->cycles,
2200                                  ktime_to_ns(ktime_get_real()));
2201                 /*
2202                  * Synchronize our NIC clock against system wall clock. NIC
2203                  * time stamp reading requires ~3us per sample, each sample
2204                  * was pretty stable even under load => only require 10
2205                  * samples for each offset comparison.
2206                  */
2207                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2208                 adapter->compare.source = &adapter->clock;
2209                 adapter->compare.target = ktime_get_real;
2210                 adapter->compare.num_samples = 10;
2211                 timecompare_update(&adapter->compare, 0);
2212                 break;
2213         case e1000_82576:
2214                 /*
2215                  * Initialize hardware timer: we keep it running just in case
2216                  * that some program needs it later on.
2217                  */
2218                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2219                 adapter->cycles.read = igb_read_clock;
2220                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2221                 adapter->cycles.mult = 1;
2222                 /**
2223                  * Scale the NIC clock cycle by a large factor so that
2224                  * relatively small clock corrections can be added or
2225                  * substracted at each clock tick. The drawbacks of a large
2226                  * factor are a) that the clock register overflows more quickly
2227                  * (not such a big deal) and b) that the increment per tick has
2228                  * to fit into 24 bits.  As a result we need to use a shift of
2229                  * 19 so we can fit a value of 16 into the TIMINCA register.
2230                  */
2231                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2232                 wr32(E1000_TIMINCA,
2233                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2234                                 (16 << IGB_82576_TSYNC_SHIFT));
2235
2236                 /* Set registers so that rollover occurs soon to test this. */
2237                 wr32(E1000_SYSTIML, 0x00000000);
2238                 wr32(E1000_SYSTIMH, 0xFF800000);
2239                 wrfl();
2240
2241                 timecounter_init(&adapter->clock,
2242                                  &adapter->cycles,
2243                                  ktime_to_ns(ktime_get_real()));
2244                 /*
2245                  * Synchronize our NIC clock against system wall clock. NIC
2246                  * time stamp reading requires ~3us per sample, each sample
2247                  * was pretty stable even under load => only require 10
2248                  * samples for each offset comparison.
2249                  */
2250                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2251                 adapter->compare.source = &adapter->clock;
2252                 adapter->compare.target = ktime_get_real;
2253                 adapter->compare.num_samples = 10;
2254                 timecompare_update(&adapter->compare, 0);
2255                 break;
2256         case e1000_82575:
2257                 /* 82575 does not support timesync */
2258         default:
2259                 break;
2260         }
2261
2262 }
2263
2264 /**
2265  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2266  * @adapter: board private structure to initialize
2267  *
2268  * igb_sw_init initializes the Adapter private data structure.
2269  * Fields are initialized based on PCI device information and
2270  * OS network device settings (MTU size).
2271  **/
2272 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2273 {
2274         struct e1000_hw *hw = &adapter->hw;
2275         struct net_device *netdev = adapter->netdev;
2276         struct pci_dev *pdev = adapter->pdev;
2277
2278         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2279
2280         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2281         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2282         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2283         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2284
2285         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2286         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2287
2288         spin_lock_init(&adapter->stats64_lock);
2289 #ifdef CONFIG_PCI_IOV
2290         if (hw->mac.type == e1000_82576)
2291                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2292
2293 #endif /* CONFIG_PCI_IOV */
2294         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2295
2296         /*
2297          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2298          * then we should combine the queues into a queue pair in order to
2299          * conserve interrupts due to limited supply
2300          */
2301         if ((adapter->rss_queues > 4) ||
2302             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2303                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2304
2305         /* This call may decrease the number of queues */
2306         if (igb_init_interrupt_scheme(adapter)) {
2307                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2308                 return -ENOMEM;
2309         }
2310
2311         igb_init_hw_timer(adapter);
2312         igb_probe_vfs(adapter);
2313
2314         /* Explicitly disable IRQ since the NIC can be in any state. */
2315         igb_irq_disable(adapter);
2316
2317         set_bit(__IGB_DOWN, &adapter->state);
2318         return 0;
2319 }
2320
2321 /**
2322  * igb_open - Called when a network interface is made active
2323  * @netdev: network interface device structure
2324  *
2325  * Returns 0 on success, negative value on failure
2326  *
2327  * The open entry point is called when a network interface is made
2328  * active by the system (IFF_UP).  At this point all resources needed
2329  * for transmit and receive operations are allocated, the interrupt
2330  * handler is registered with the OS, the watchdog timer is started,
2331  * and the stack is notified that the interface is ready.
2332  **/
2333 static int igb_open(struct net_device *netdev)
2334 {
2335         struct igb_adapter *adapter = netdev_priv(netdev);
2336         struct e1000_hw *hw = &adapter->hw;
2337         int err;
2338         int i;
2339
2340         /* disallow open during test */
2341         if (test_bit(__IGB_TESTING, &adapter->state))
2342                 return -EBUSY;
2343
2344         netif_carrier_off(netdev);
2345
2346         /* allocate transmit descriptors */
2347         err = igb_setup_all_tx_resources(adapter);
2348         if (err)
2349                 goto err_setup_tx;
2350
2351         /* allocate receive descriptors */
2352         err = igb_setup_all_rx_resources(adapter);
2353         if (err)
2354                 goto err_setup_rx;
2355
2356         igb_power_up_link(adapter);
2357
2358         /* before we allocate an interrupt, we must be ready to handle it.
2359          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2360          * as soon as we call pci_request_irq, so we have to setup our
2361          * clean_rx handler before we do so.  */
2362         igb_configure(adapter);
2363
2364         err = igb_request_irq(adapter);
2365         if (err)
2366                 goto err_req_irq;
2367
2368         /* From here on the code is the same as igb_up() */
2369         clear_bit(__IGB_DOWN, &adapter->state);
2370
2371         for (i = 0; i < adapter->num_q_vectors; i++) {
2372                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2373                 napi_enable(&q_vector->napi);
2374         }
2375
2376         /* Clear any pending interrupts. */
2377         rd32(E1000_ICR);
2378
2379         igb_irq_enable(adapter);
2380
2381         /* notify VFs that reset has been completed */
2382         if (adapter->vfs_allocated_count) {
2383                 u32 reg_data = rd32(E1000_CTRL_EXT);
2384                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2385                 wr32(E1000_CTRL_EXT, reg_data);
2386         }
2387
2388         netif_tx_start_all_queues(netdev);
2389
2390         /* start the watchdog. */
2391         hw->mac.get_link_status = 1;
2392         schedule_work(&adapter->watchdog_task);
2393
2394         return 0;
2395
2396 err_req_irq:
2397         igb_release_hw_control(adapter);
2398         igb_power_down_link(adapter);
2399         igb_free_all_rx_resources(adapter);
2400 err_setup_rx:
2401         igb_free_all_tx_resources(adapter);
2402 err_setup_tx:
2403         igb_reset(adapter);
2404
2405         return err;
2406 }
2407
2408 /**
2409  * igb_close - Disables a network interface
2410  * @netdev: network interface device structure
2411  *
2412  * Returns 0, this is not allowed to fail
2413  *
2414  * The close entry point is called when an interface is de-activated
2415  * by the OS.  The hardware is still under the driver's control, but
2416  * needs to be disabled.  A global MAC reset is issued to stop the
2417  * hardware, and all transmit and receive resources are freed.
2418  **/
2419 static int igb_close(struct net_device *netdev)
2420 {
2421         struct igb_adapter *adapter = netdev_priv(netdev);
2422
2423         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2424         igb_down(adapter);
2425
2426         igb_free_irq(adapter);
2427
2428         igb_free_all_tx_resources(adapter);
2429         igb_free_all_rx_resources(adapter);
2430
2431         return 0;
2432 }
2433
2434 /**
2435  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2436  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2437  *
2438  * Return 0 on success, negative on failure
2439  **/
2440 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2441 {
2442         struct device *dev = tx_ring->dev;
2443         int size;
2444
2445         size = sizeof(struct igb_buffer) * tx_ring->count;
2446         tx_ring->buffer_info = vzalloc(size);
2447         if (!tx_ring->buffer_info)
2448                 goto err;
2449
2450         /* round up to nearest 4K */
2451         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2452         tx_ring->size = ALIGN(tx_ring->size, 4096);
2453
2454         tx_ring->desc = dma_alloc_coherent(dev,
2455                                            tx_ring->size,
2456                                            &tx_ring->dma,
2457                                            GFP_KERNEL);
2458
2459         if (!tx_ring->desc)
2460                 goto err;
2461
2462         tx_ring->next_to_use = 0;
2463         tx_ring->next_to_clean = 0;
2464         return 0;
2465
2466 err:
2467         vfree(tx_ring->buffer_info);
2468         dev_err(dev,
2469                 "Unable to allocate memory for the transmit descriptor ring\n");
2470         return -ENOMEM;
2471 }
2472
2473 /**
2474  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2475  *                                (Descriptors) for all queues
2476  * @adapter: board private structure
2477  *
2478  * Return 0 on success, negative on failure
2479  **/
2480 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2481 {
2482         struct pci_dev *pdev = adapter->pdev;
2483         int i, err = 0;
2484
2485         for (i = 0; i < adapter->num_tx_queues; i++) {
2486                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2487                 if (err) {
2488                         dev_err(&pdev->dev,
2489                                 "Allocation for Tx Queue %u failed\n", i);
2490                         for (i--; i >= 0; i--)
2491                                 igb_free_tx_resources(adapter->tx_ring[i]);
2492                         break;
2493                 }
2494         }
2495
2496         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2497                 int r_idx = i % adapter->num_tx_queues;
2498                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2499         }
2500         return err;
2501 }
2502
2503 /**
2504  * igb_setup_tctl - configure the transmit control registers
2505  * @adapter: Board private structure
2506  **/
2507 void igb_setup_tctl(struct igb_adapter *adapter)
2508 {
2509         struct e1000_hw *hw = &adapter->hw;
2510         u32 tctl;
2511
2512         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2513         wr32(E1000_TXDCTL(0), 0);
2514
2515         /* Program the Transmit Control Register */
2516         tctl = rd32(E1000_TCTL);
2517         tctl &= ~E1000_TCTL_CT;
2518         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2519                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2520
2521         igb_config_collision_dist(hw);
2522
2523         /* Enable transmits */
2524         tctl |= E1000_TCTL_EN;
2525
2526         wr32(E1000_TCTL, tctl);
2527 }
2528
2529 /**
2530  * igb_configure_tx_ring - Configure transmit ring after Reset
2531  * @adapter: board private structure
2532  * @ring: tx ring to configure
2533  *
2534  * Configure a transmit ring after a reset.
2535  **/
2536 void igb_configure_tx_ring(struct igb_adapter *adapter,
2537                            struct igb_ring *ring)
2538 {
2539         struct e1000_hw *hw = &adapter->hw;
2540         u32 txdctl;
2541         u64 tdba = ring->dma;
2542         int reg_idx = ring->reg_idx;
2543
2544         /* disable the queue */
2545         txdctl = rd32(E1000_TXDCTL(reg_idx));
2546         wr32(E1000_TXDCTL(reg_idx),
2547                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2548         wrfl();
2549         mdelay(10);
2550
2551         wr32(E1000_TDLEN(reg_idx),
2552                         ring->count * sizeof(union e1000_adv_tx_desc));
2553         wr32(E1000_TDBAL(reg_idx),
2554                         tdba & 0x00000000ffffffffULL);
2555         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2556
2557         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2558         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2559         writel(0, ring->head);
2560         writel(0, ring->tail);
2561
2562         txdctl |= IGB_TX_PTHRESH;
2563         txdctl |= IGB_TX_HTHRESH << 8;
2564         txdctl |= IGB_TX_WTHRESH << 16;
2565
2566         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2567         wr32(E1000_TXDCTL(reg_idx), txdctl);
2568 }
2569
2570 /**
2571  * igb_configure_tx - Configure transmit Unit after Reset
2572  * @adapter: board private structure
2573  *
2574  * Configure the Tx unit of the MAC after a reset.
2575  **/
2576 static void igb_configure_tx(struct igb_adapter *adapter)
2577 {
2578         int i;
2579
2580         for (i = 0; i < adapter->num_tx_queues; i++)
2581                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2582 }
2583
2584 /**
2585  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2586  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2587  *
2588  * Returns 0 on success, negative on failure
2589  **/
2590 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2591 {
2592         struct device *dev = rx_ring->dev;
2593         int size, desc_len;
2594
2595         size = sizeof(struct igb_buffer) * rx_ring->count;
2596         rx_ring->buffer_info = vzalloc(size);
2597         if (!rx_ring->buffer_info)
2598                 goto err;
2599
2600         desc_len = sizeof(union e1000_adv_rx_desc);
2601
2602         /* Round up to nearest 4K */
2603         rx_ring->size = rx_ring->count * desc_len;
2604         rx_ring->size = ALIGN(rx_ring->size, 4096);
2605
2606         rx_ring->desc = dma_alloc_coherent(dev,
2607                                            rx_ring->size,
2608                                            &rx_ring->dma,
2609                                            GFP_KERNEL);
2610
2611         if (!rx_ring->desc)
2612                 goto err;
2613
2614         rx_ring->next_to_clean = 0;
2615         rx_ring->next_to_use = 0;
2616
2617         return 0;
2618
2619 err:
2620         vfree(rx_ring->buffer_info);
2621         rx_ring->buffer_info = NULL;
2622         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2623                 " ring\n");
2624         return -ENOMEM;
2625 }
2626
2627 /**
2628  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2629  *                                (Descriptors) for all queues
2630  * @adapter: board private structure
2631  *
2632  * Return 0 on success, negative on failure
2633  **/
2634 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2635 {
2636         struct pci_dev *pdev = adapter->pdev;
2637         int i, err = 0;
2638
2639         for (i = 0; i < adapter->num_rx_queues; i++) {
2640                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2641                 if (err) {
2642                         dev_err(&pdev->dev,
2643                                 "Allocation for Rx Queue %u failed\n", i);
2644                         for (i--; i >= 0; i--)
2645                                 igb_free_rx_resources(adapter->rx_ring[i]);
2646                         break;
2647                 }
2648         }
2649
2650         return err;
2651 }
2652
2653 /**
2654  * igb_setup_mrqc - configure the multiple receive queue control registers
2655  * @adapter: Board private structure
2656  **/
2657 static void igb_setup_mrqc(struct igb_adapter *adapter)
2658 {
2659         struct e1000_hw *hw = &adapter->hw;
2660         u32 mrqc, rxcsum;
2661         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2662         union e1000_reta {
2663                 u32 dword;
2664                 u8  bytes[4];
2665         } reta;
2666         static const u8 rsshash[40] = {
2667                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2668                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2669                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2670                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2671
2672         /* Fill out hash function seeds */
2673         for (j = 0; j < 10; j++) {
2674                 u32 rsskey = rsshash[(j * 4)];
2675                 rsskey |= rsshash[(j * 4) + 1] << 8;
2676                 rsskey |= rsshash[(j * 4) + 2] << 16;
2677                 rsskey |= rsshash[(j * 4) + 3] << 24;
2678                 array_wr32(E1000_RSSRK(0), j, rsskey);
2679         }
2680
2681         num_rx_queues = adapter->rss_queues;
2682
2683         if (adapter->vfs_allocated_count) {
2684                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2685                 switch (hw->mac.type) {
2686                 case e1000_i350:
2687                 case e1000_82580:
2688                         num_rx_queues = 1;
2689                         shift = 0;
2690                         break;
2691                 case e1000_82576:
2692                         shift = 3;
2693                         num_rx_queues = 2;
2694                         break;
2695                 case e1000_82575:
2696                         shift = 2;
2697                         shift2 = 6;
2698                 default:
2699                         break;
2700                 }
2701         } else {
2702                 if (hw->mac.type == e1000_82575)
2703                         shift = 6;
2704         }
2705
2706         for (j = 0; j < (32 * 4); j++) {
2707                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2708                 if (shift2)
2709                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2710                 if ((j & 3) == 3)
2711                         wr32(E1000_RETA(j >> 2), reta.dword);
2712         }
2713
2714         /*
2715          * Disable raw packet checksumming so that RSS hash is placed in
2716          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2717          * offloads as they are enabled by default
2718          */
2719         rxcsum = rd32(E1000_RXCSUM);
2720         rxcsum |= E1000_RXCSUM_PCSD;
2721
2722         if (adapter->hw.mac.type >= e1000_82576)
2723                 /* Enable Receive Checksum Offload for SCTP */
2724                 rxcsum |= E1000_RXCSUM_CRCOFL;
2725
2726         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2727         wr32(E1000_RXCSUM, rxcsum);
2728
2729         /* If VMDq is enabled then we set the appropriate mode for that, else
2730          * we default to RSS so that an RSS hash is calculated per packet even
2731          * if we are only using one queue */
2732         if (adapter->vfs_allocated_count) {
2733                 if (hw->mac.type > e1000_82575) {
2734                         /* Set the default pool for the PF's first queue */
2735                         u32 vtctl = rd32(E1000_VT_CTL);
2736                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2737                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2738                         vtctl |= adapter->vfs_allocated_count <<
2739                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2740                         wr32(E1000_VT_CTL, vtctl);
2741                 }
2742                 if (adapter->rss_queues > 1)
2743                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2744                 else
2745                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2746         } else {
2747                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2748         }
2749         igb_vmm_control(adapter);
2750
2751         /*
2752          * Generate RSS hash based on TCP port numbers and/or
2753          * IPv4/v6 src and dst addresses since UDP cannot be
2754          * hashed reliably due to IP fragmentation
2755          */
2756         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2757                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2758                 E1000_MRQC_RSS_FIELD_IPV6 |
2759                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2760                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2761
2762         wr32(E1000_MRQC, mrqc);
2763 }
2764
2765 /**
2766  * igb_setup_rctl - configure the receive control registers
2767  * @adapter: Board private structure
2768  **/
2769 void igb_setup_rctl(struct igb_adapter *adapter)
2770 {
2771         struct e1000_hw *hw = &adapter->hw;
2772         u32 rctl;
2773
2774         rctl = rd32(E1000_RCTL);
2775
2776         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2777         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2778
2779         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2780                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2781
2782         /*
2783          * enable stripping of CRC. It's unlikely this will break BMC
2784          * redirection as it did with e1000. Newer features require
2785          * that the HW strips the CRC.
2786          */
2787         rctl |= E1000_RCTL_SECRC;
2788
2789         /* disable store bad packets and clear size bits. */
2790         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2791
2792         /* enable LPE to prevent packets larger than max_frame_size */
2793         rctl |= E1000_RCTL_LPE;
2794
2795         /* disable queue 0 to prevent tail write w/o re-config */
2796         wr32(E1000_RXDCTL(0), 0);
2797
2798         /* Attention!!!  For SR-IOV PF driver operations you must enable
2799          * queue drop for all VF and PF queues to prevent head of line blocking
2800          * if an un-trusted VF does not provide descriptors to hardware.
2801          */
2802         if (adapter->vfs_allocated_count) {
2803                 /* set all queue drop enable bits */
2804                 wr32(E1000_QDE, ALL_QUEUES);
2805         }
2806
2807         wr32(E1000_RCTL, rctl);
2808 }
2809
2810 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2811                                    int vfn)
2812 {
2813         struct e1000_hw *hw = &adapter->hw;
2814         u32 vmolr;
2815
2816         /* if it isn't the PF check to see if VFs are enabled and
2817          * increase the size to support vlan tags */
2818         if (vfn < adapter->vfs_allocated_count &&
2819             adapter->vf_data[vfn].vlans_enabled)
2820                 size += VLAN_TAG_SIZE;
2821
2822         vmolr = rd32(E1000_VMOLR(vfn));
2823         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2824         vmolr |= size | E1000_VMOLR_LPE;
2825         wr32(E1000_VMOLR(vfn), vmolr);
2826
2827         return 0;
2828 }
2829
2830 /**
2831  * igb_rlpml_set - set maximum receive packet size
2832  * @adapter: board private structure
2833  *
2834  * Configure maximum receivable packet size.
2835  **/
2836 static void igb_rlpml_set(struct igb_adapter *adapter)
2837 {
2838         u32 max_frame_size = adapter->max_frame_size;
2839         struct e1000_hw *hw = &adapter->hw;
2840         u16 pf_id = adapter->vfs_allocated_count;
2841
2842         if (adapter->vlgrp)
2843                 max_frame_size += VLAN_TAG_SIZE;
2844
2845         /* if vfs are enabled we set RLPML to the largest possible request
2846          * size and set the VMOLR RLPML to the size we need */
2847         if (pf_id) {
2848                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2849                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2850         }
2851
2852         wr32(E1000_RLPML, max_frame_size);
2853 }
2854
2855 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2856                                  int vfn, bool aupe)
2857 {
2858         struct e1000_hw *hw = &adapter->hw;
2859         u32 vmolr;
2860
2861         /*
2862          * This register exists only on 82576 and newer so if we are older then
2863          * we should exit and do nothing
2864          */
2865         if (hw->mac.type < e1000_82576)
2866                 return;
2867
2868         vmolr = rd32(E1000_VMOLR(vfn));
2869         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2870         if (aupe)
2871                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2872         else
2873                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2874
2875         /* clear all bits that might not be set */
2876         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2877
2878         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2879                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2880         /*
2881          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2882          * multicast packets
2883          */
2884         if (vfn <= adapter->vfs_allocated_count)
2885                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2886
2887         wr32(E1000_VMOLR(vfn), vmolr);
2888 }
2889
2890 /**
2891  * igb_configure_rx_ring - Configure a receive ring after Reset
2892  * @adapter: board private structure
2893  * @ring: receive ring to be configured
2894  *
2895  * Configure the Rx unit of the MAC after a reset.
2896  **/
2897 void igb_configure_rx_ring(struct igb_adapter *adapter,
2898                            struct igb_ring *ring)
2899 {
2900         struct e1000_hw *hw = &adapter->hw;
2901         u64 rdba = ring->dma;
2902         int reg_idx = ring->reg_idx;
2903         u32 srrctl, rxdctl;
2904
2905         /* disable the queue */
2906         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2907         wr32(E1000_RXDCTL(reg_idx),
2908                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2909
2910         /* Set DMA base address registers */
2911         wr32(E1000_RDBAL(reg_idx),
2912              rdba & 0x00000000ffffffffULL);
2913         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2914         wr32(E1000_RDLEN(reg_idx),
2915                        ring->count * sizeof(union e1000_adv_rx_desc));
2916
2917         /* initialize head and tail */
2918         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2919         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2920         writel(0, ring->head);
2921         writel(0, ring->tail);
2922
2923         /* set descriptor configuration */
2924         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2925                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2926                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2927 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2928                 srrctl |= IGB_RXBUFFER_16384 >>
2929                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2930 #else
2931                 srrctl |= (PAGE_SIZE / 2) >>
2932                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2933 #endif
2934                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2935         } else {
2936                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2937                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2938                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2939         }
2940         if (hw->mac.type == e1000_82580)
2941                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2942         /* Only set Drop Enable if we are supporting multiple queues */
2943         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2944                 srrctl |= E1000_SRRCTL_DROP_EN;
2945
2946         wr32(E1000_SRRCTL(reg_idx), srrctl);
2947
2948         /* set filtering for VMDQ pools */
2949         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2950
2951         /* enable receive descriptor fetching */
2952         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2953         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2954         rxdctl &= 0xFFF00000;
2955         rxdctl |= IGB_RX_PTHRESH;
2956         rxdctl |= IGB_RX_HTHRESH << 8;
2957         rxdctl |= IGB_RX_WTHRESH << 16;
2958         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2959 }
2960
2961 /**
2962  * igb_configure_rx - Configure receive Unit after Reset
2963  * @adapter: board private structure
2964  *
2965  * Configure the Rx unit of the MAC after a reset.
2966  **/
2967 static void igb_configure_rx(struct igb_adapter *adapter)
2968 {
2969         int i;
2970
2971         /* set UTA to appropriate mode */
2972         igb_set_uta(adapter);
2973
2974         /* set the correct pool for the PF default MAC address in entry 0 */
2975         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2976                          adapter->vfs_allocated_count);
2977
2978         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2979          * the Base and Length of the Rx Descriptor Ring */
2980         for (i = 0; i < adapter->num_rx_queues; i++)
2981                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2982 }
2983
2984 /**
2985  * igb_free_tx_resources - Free Tx Resources per Queue
2986  * @tx_ring: Tx descriptor ring for a specific queue
2987  *
2988  * Free all transmit software resources
2989  **/
2990 void igb_free_tx_resources(struct igb_ring *tx_ring)
2991 {
2992         igb_clean_tx_ring(tx_ring);
2993
2994         vfree(tx_ring->buffer_info);
2995         tx_ring->buffer_info = NULL;
2996
2997         /* if not set, then don't free */
2998         if (!tx_ring->desc)
2999                 return;
3000
3001         dma_free_coherent(tx_ring->dev, tx_ring->size,
3002                           tx_ring->desc, tx_ring->dma);
3003
3004         tx_ring->desc = NULL;
3005 }
3006
3007 /**
3008  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3009  * @adapter: board private structure
3010  *
3011  * Free all transmit software resources
3012  **/
3013 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3014 {
3015         int i;
3016
3017         for (i = 0; i < adapter->num_tx_queues; i++)
3018                 igb_free_tx_resources(adapter->tx_ring[i]);
3019 }
3020
3021 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3022                                     struct igb_buffer *buffer_info)
3023 {
3024         if (buffer_info->dma) {
3025                 if (buffer_info->mapped_as_page)
3026                         dma_unmap_page(tx_ring->dev,
3027                                         buffer_info->dma,
3028                                         buffer_info->length,
3029                                         DMA_TO_DEVICE);
3030                 else
3031                         dma_unmap_single(tx_ring->dev,
3032                                         buffer_info->dma,
3033                                         buffer_info->length,
3034                                         DMA_TO_DEVICE);
3035                 buffer_info->dma = 0;
3036         }
3037         if (buffer_info->skb) {
3038                 dev_kfree_skb_any(buffer_info->skb);
3039                 buffer_info->skb = NULL;
3040         }
3041         buffer_info->time_stamp = 0;
3042         buffer_info->length = 0;
3043         buffer_info->next_to_watch = 0;
3044         buffer_info->mapped_as_page = false;
3045 }
3046
3047 /**
3048  * igb_clean_tx_ring - Free Tx Buffers
3049  * @tx_ring: ring to be cleaned
3050  **/
3051 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3052 {
3053         struct igb_buffer *buffer_info;
3054         unsigned long size;
3055         unsigned int i;
3056
3057         if (!tx_ring->buffer_info)
3058                 return;
3059         /* Free all the Tx ring sk_buffs */
3060
3061         for (i = 0; i < tx_ring->count; i++) {
3062                 buffer_info = &tx_ring->buffer_info[i];
3063                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3064         }
3065
3066         size = sizeof(struct igb_buffer) * tx_ring->count;
3067         memset(tx_ring->buffer_info, 0, size);
3068
3069         /* Zero out the descriptor ring */
3070         memset(tx_ring->desc, 0, tx_ring->size);
3071
3072         tx_ring->next_to_use = 0;
3073         tx_ring->next_to_clean = 0;
3074 }
3075
3076 /**
3077  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3078  * @adapter: board private structure
3079  **/
3080 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3081 {
3082         int i;
3083
3084         for (i = 0; i < adapter->num_tx_queues; i++)
3085                 igb_clean_tx_ring(adapter->tx_ring[i]);
3086 }
3087
3088 /**
3089  * igb_free_rx_resources - Free Rx Resources
3090  * @rx_ring: ring to clean the resources from
3091  *
3092  * Free all receive software resources
3093  **/
3094 void igb_free_rx_resources(struct igb_ring *rx_ring)
3095 {
3096         igb_clean_rx_ring(rx_ring);
3097
3098         vfree(rx_ring->buffer_info);
3099         rx_ring->buffer_info = NULL;
3100
3101         /* if not set, then don't free */
3102         if (!rx_ring->desc)
3103                 return;
3104
3105         dma_free_coherent(rx_ring->dev, rx_ring->size,
3106                           rx_ring->desc, rx_ring->dma);
3107
3108         rx_ring->desc = NULL;
3109 }
3110
3111 /**
3112  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3113  * @adapter: board private structure
3114  *
3115  * Free all receive software resources
3116  **/
3117 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3118 {
3119         int i;
3120
3121         for (i = 0; i < adapter->num_rx_queues; i++)
3122                 igb_free_rx_resources(adapter->rx_ring[i]);
3123 }
3124
3125 /**
3126  * igb_clean_rx_ring - Free Rx Buffers per Queue
3127  * @rx_ring: ring to free buffers from
3128  **/
3129 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3130 {
3131         struct igb_buffer *buffer_info;
3132         unsigned long size;
3133         unsigned int i;
3134
3135         if (!rx_ring->buffer_info)
3136                 return;
3137
3138         /* Free all the Rx ring sk_buffs */
3139         for (i = 0; i < rx_ring->count; i++) {
3140                 buffer_info = &rx_ring->buffer_info[i];
3141                 if (buffer_info->dma) {
3142                         dma_unmap_single(rx_ring->dev,
3143                                          buffer_info->dma,
3144                                          rx_ring->rx_buffer_len,
3145                                          DMA_FROM_DEVICE);
3146                         buffer_info->dma = 0;
3147                 }
3148
3149                 if (buffer_info->skb) {
3150                         dev_kfree_skb(buffer_info->skb);
3151                         buffer_info->skb = NULL;
3152                 }
3153                 if (buffer_info->page_dma) {
3154                         dma_unmap_page(rx_ring->dev,
3155                                        buffer_info->page_dma,
3156                                        PAGE_SIZE / 2,
3157                                        DMA_FROM_DEVICE);
3158                         buffer_info->page_dma = 0;
3159                 }
3160                 if (buffer_info->page) {
3161                         put_page(buffer_info->page);
3162                         buffer_info->page = NULL;
3163                         buffer_info->page_offset = 0;
3164                 }
3165         }
3166
3167         size = sizeof(struct igb_buffer) * rx_ring->count;
3168         memset(rx_ring->buffer_info, 0, size);
3169
3170         /* Zero out the descriptor ring */
3171         memset(rx_ring->desc, 0, rx_ring->size);
3172
3173         rx_ring->next_to_clean = 0;
3174         rx_ring->next_to_use = 0;
3175 }
3176
3177 /**
3178  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3179  * @adapter: board private structure
3180  **/
3181 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3182 {
3183         int i;
3184
3185         for (i = 0; i < adapter->num_rx_queues; i++)
3186                 igb_clean_rx_ring(adapter->rx_ring[i]);
3187 }
3188
3189 /**
3190  * igb_set_mac - Change the Ethernet Address of the NIC
3191  * @netdev: network interface device structure
3192  * @p: pointer to an address structure
3193  *
3194  * Returns 0 on success, negative on failure
3195  **/
3196 static int igb_set_mac(struct net_device *netdev, void *p)
3197 {
3198         struct igb_adapter *adapter = netdev_priv(netdev);
3199         struct e1000_hw *hw = &adapter->hw;
3200         struct sockaddr *addr = p;
3201
3202         if (!is_valid_ether_addr(addr->sa_data))
3203                 return -EADDRNOTAVAIL;
3204
3205         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3206         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3207
3208         /* set the correct pool for the new PF MAC address in entry 0 */
3209         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3210                          adapter->vfs_allocated_count);
3211
3212         return 0;
3213 }
3214
3215 /**
3216  * igb_write_mc_addr_list - write multicast addresses to MTA
3217  * @netdev: network interface device structure
3218  *
3219  * Writes multicast address list to the MTA hash table.
3220  * Returns: -ENOMEM on failure
3221  *                0 on no addresses written
3222  *                X on writing X addresses to MTA
3223  **/
3224 static int igb_write_mc_addr_list(struct net_device *netdev)
3225 {
3226         struct igb_adapter *adapter = netdev_priv(netdev);
3227         struct e1000_hw *hw = &adapter->hw;
3228         struct netdev_hw_addr *ha;
3229         u8  *mta_list;
3230         int i;
3231
3232         if (netdev_mc_empty(netdev)) {
3233                 /* nothing to program, so clear mc list */
3234                 igb_update_mc_addr_list(hw, NULL, 0);
3235                 igb_restore_vf_multicasts(adapter);
3236                 return 0;
3237         }
3238
3239         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3240         if (!mta_list)
3241                 return -ENOMEM;
3242
3243         /* The shared function expects a packed array of only addresses. */
3244         i = 0;
3245         netdev_for_each_mc_addr(ha, netdev)
3246                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3247
3248         igb_update_mc_addr_list(hw, mta_list, i);
3249         kfree(mta_list);
3250
3251         return netdev_mc_count(netdev);
3252 }
3253
3254 /**
3255  * igb_write_uc_addr_list - write unicast addresses to RAR table
3256  * @netdev: network interface device structure
3257  *
3258  * Writes unicast address list to the RAR table.
3259  * Returns: -ENOMEM on failure/insufficient address space
3260  *                0 on no addresses written
3261  *                X on writing X addresses to the RAR table
3262  **/
3263 static int igb_write_uc_addr_list(struct net_device *netdev)
3264 {
3265         struct igb_adapter *adapter = netdev_priv(netdev);
3266         struct e1000_hw *hw = &adapter->hw;
3267         unsigned int vfn = adapter->vfs_allocated_count;
3268         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3269         int count = 0;
3270
3271         /* return ENOMEM indicating insufficient memory for addresses */
3272         if (netdev_uc_count(netdev) > rar_entries)
3273                 return -ENOMEM;
3274
3275         if (!netdev_uc_empty(netdev) && rar_entries) {
3276                 struct netdev_hw_addr *ha;
3277
3278                 netdev_for_each_uc_addr(ha, netdev) {
3279                         if (!rar_entries)
3280                                 break;
3281                         igb_rar_set_qsel(adapter, ha->addr,
3282                                          rar_entries--,
3283                                          vfn);
3284                         count++;
3285                 }
3286         }
3287         /* write the addresses in reverse order to avoid write combining */
3288         for (; rar_entries > 0 ; rar_entries--) {
3289                 wr32(E1000_RAH(rar_entries), 0);
3290                 wr32(E1000_RAL(rar_entries), 0);
3291         }
3292         wrfl();
3293
3294         return count;
3295 }
3296
3297 /**
3298  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3299  * @netdev: network interface device structure
3300  *
3301  * The set_rx_mode entry point is called whenever the unicast or multicast
3302  * address lists or the network interface flags are updated.  This routine is
3303  * responsible for configuring the hardware for proper unicast, multicast,
3304  * promiscuous mode, and all-multi behavior.
3305  **/
3306 static void igb_set_rx_mode(struct net_device *netdev)
3307 {
3308         struct igb_adapter *adapter = netdev_priv(netdev);
3309         struct e1000_hw *hw = &adapter->hw;
3310         unsigned int vfn = adapter->vfs_allocated_count;
3311         u32 rctl, vmolr = 0;
3312         int count;
3313
3314         /* Check for Promiscuous and All Multicast modes */
3315         rctl = rd32(E1000_RCTL);
3316
3317         /* clear the effected bits */
3318         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3319
3320         if (netdev->flags & IFF_PROMISC) {
3321                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3322                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3323         } else {
3324                 if (netdev->flags & IFF_ALLMULTI) {
3325                         rctl |= E1000_RCTL_MPE;
3326                         vmolr |= E1000_VMOLR_MPME;
3327                 } else {
3328                         /*
3329                          * Write addresses to the MTA, if the attempt fails
3330                          * then we should just turn on promiscous mode so
3331                          * that we can at least receive multicast traffic
3332                          */
3333                         count = igb_write_mc_addr_list(netdev);
3334                         if (count < 0) {
3335                                 rctl |= E1000_RCTL_MPE;
3336                                 vmolr |= E1000_VMOLR_MPME;
3337                         } else if (count) {
3338                                 vmolr |= E1000_VMOLR_ROMPE;
3339                         }
3340                 }
3341                 /*
3342                  * Write addresses to available RAR registers, if there is not
3343                  * sufficient space to store all the addresses then enable
3344                  * unicast promiscous mode
3345                  */
3346                 count = igb_write_uc_addr_list(netdev);
3347                 if (count < 0) {
3348                         rctl |= E1000_RCTL_UPE;
3349                         vmolr |= E1000_VMOLR_ROPE;
3350                 }
3351                 rctl |= E1000_RCTL_VFE;
3352         }
3353         wr32(E1000_RCTL, rctl);
3354
3355         /*
3356          * In order to support SR-IOV and eventually VMDq it is necessary to set
3357          * the VMOLR to enable the appropriate modes.  Without this workaround
3358          * we will have issues with VLAN tag stripping not being done for frames
3359          * that are only arriving because we are the default pool
3360          */
3361         if (hw->mac.type < e1000_82576)
3362                 return;
3363
3364         vmolr |= rd32(E1000_VMOLR(vfn)) &
3365                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3366         wr32(E1000_VMOLR(vfn), vmolr);
3367         igb_restore_vf_multicasts(adapter);
3368 }
3369
3370 static void igb_check_wvbr(struct igb_adapter *adapter)
3371 {
3372         struct e1000_hw *hw = &adapter->hw;
3373         u32 wvbr = 0;
3374
3375         switch (hw->mac.type) {
3376         case e1000_82576:
3377         case e1000_i350:
3378                 if (!(wvbr = rd32(E1000_WVBR)))
3379                         return;
3380                 break;
3381         default:
3382                 break;
3383         }
3384
3385         adapter->wvbr |= wvbr;
3386 }
3387
3388 #define IGB_STAGGERED_QUEUE_OFFSET 8
3389
3390 static void igb_spoof_check(struct igb_adapter *adapter)
3391 {
3392         int j;
3393
3394         if (!adapter->wvbr)
3395                 return;
3396
3397         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3398                 if (adapter->wvbr & (1 << j) ||
3399                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3400                         dev_warn(&adapter->pdev->dev,
3401                                 "Spoof event(s) detected on VF %d\n", j);
3402                         adapter->wvbr &=
3403                                 ~((1 << j) |
3404                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3405                 }
3406         }
3407 }
3408
3409 /* Need to wait a few seconds after link up to get diagnostic information from
3410  * the phy */
3411 static void igb_update_phy_info(unsigned long data)
3412 {
3413         struct igb_adapter *adapter = (struct igb_adapter *) data;
3414         igb_get_phy_info(&adapter->hw);
3415 }
3416
3417 /**
3418  * igb_has_link - check shared code for link and determine up/down
3419  * @adapter: pointer to driver private info
3420  **/
3421 bool igb_has_link(struct igb_adapter *adapter)
3422 {
3423         struct e1000_hw *hw = &adapter->hw;
3424         bool link_active = false;
3425         s32 ret_val = 0;
3426
3427         /* get_link_status is set on LSC (link status) interrupt or
3428          * rx sequence error interrupt.  get_link_status will stay
3429          * false until the e1000_check_for_link establishes link
3430          * for copper adapters ONLY
3431          */
3432         switch (hw->phy.media_type) {
3433         case e1000_media_type_copper:
3434                 if (hw->mac.get_link_status) {
3435                         ret_val = hw->mac.ops.check_for_link(hw);
3436                         link_active = !hw->mac.get_link_status;
3437                 } else {
3438                         link_active = true;
3439                 }
3440                 break;
3441         case e1000_media_type_internal_serdes:
3442                 ret_val = hw->mac.ops.check_for_link(hw);
3443                 link_active = hw->mac.serdes_has_link;
3444                 break;
3445         default:
3446         case e1000_media_type_unknown:
3447                 break;
3448         }
3449
3450         return link_active;
3451 }
3452
3453 /**
3454  * igb_watchdog - Timer Call-back
3455  * @data: pointer to adapter cast into an unsigned long
3456  **/
3457 static void igb_watchdog(unsigned long data)
3458 {
3459         struct igb_adapter *adapter = (struct igb_adapter *)data;
3460         /* Do the rest outside of interrupt context */
3461         schedule_work(&adapter->watchdog_task);
3462 }
3463
3464 static void igb_watchdog_task(struct work_struct *work)
3465 {
3466         struct igb_adapter *adapter = container_of(work,
3467                                                    struct igb_adapter,
3468                                                    watchdog_task);
3469         struct e1000_hw *hw = &adapter->hw;
3470         struct net_device *netdev = adapter->netdev;
3471         u32 link;
3472         int i;
3473
3474         link = igb_has_link(adapter);
3475         if (link) {
3476                 if (!netif_carrier_ok(netdev)) {
3477                         u32 ctrl;
3478                         hw->mac.ops.get_speed_and_duplex(hw,
3479                                                          &adapter->link_speed,
3480                                                          &adapter->link_duplex);
3481
3482                         ctrl = rd32(E1000_CTRL);
3483                         /* Links status message must follow this format */
3484                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3485                                  "Flow Control: %s\n",
3486                                netdev->name,
3487                                adapter->link_speed,
3488                                adapter->link_duplex == FULL_DUPLEX ?
3489                                  "Full Duplex" : "Half Duplex",
3490                                ((ctrl & E1000_CTRL_TFCE) &&
3491                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3492                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3493                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3494
3495                         /* adjust timeout factor according to speed/duplex */
3496                         adapter->tx_timeout_factor = 1;
3497                         switch (adapter->link_speed) {
3498                         case SPEED_10:
3499                                 adapter->tx_timeout_factor = 14;
3500                                 break;
3501                         case SPEED_100:
3502                                 /* maybe add some timeout factor ? */
3503                                 break;
3504                         }
3505
3506                         netif_carrier_on(netdev);
3507
3508                         igb_ping_all_vfs(adapter);
3509
3510                         /* link state has changed, schedule phy info update */
3511                         if (!test_bit(__IGB_DOWN, &adapter->state))
3512                                 mod_timer(&adapter->phy_info_timer,
3513                                           round_jiffies(jiffies + 2 * HZ));
3514                 }
3515         } else {
3516                 if (netif_carrier_ok(netdev)) {
3517                         adapter->link_speed = 0;
3518                         adapter->link_duplex = 0;
3519                         /* Links status message must follow this format */
3520                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3521                                netdev->name);
3522                         netif_carrier_off(netdev);
3523
3524                         igb_ping_all_vfs(adapter);
3525
3526                         /* link state has changed, schedule phy info update */
3527                         if (!test_bit(__IGB_DOWN, &adapter->state))
3528                                 mod_timer(&adapter->phy_info_timer,
3529                                           round_jiffies(jiffies + 2 * HZ));
3530                 }
3531         }
3532
3533         spin_lock(&adapter->stats64_lock);
3534         igb_update_stats(adapter, &adapter->stats64);
3535         spin_unlock(&adapter->stats64_lock);
3536
3537         for (i = 0; i < adapter->num_tx_queues; i++) {
3538                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3539                 if (!netif_carrier_ok(netdev)) {
3540                         /* We've lost link, so the controller stops DMA,
3541                          * but we've got queued Tx work that's never going
3542                          * to get done, so reset controller to flush Tx.
3543                          * (Do the reset outside of interrupt context). */
3544                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3545                                 adapter->tx_timeout_count++;
3546                                 schedule_work(&adapter->reset_task);
3547                                 /* return immediately since reset is imminent */
3548                                 return;
3549                         }
3550                 }
3551
3552                 /* Force detection of hung controller every watchdog period */
3553                 tx_ring->detect_tx_hung = true;
3554         }
3555
3556         /* Cause software interrupt to ensure rx ring is cleaned */
3557         if (adapter->msix_entries) {
3558                 u32 eics = 0;
3559                 for (i = 0; i < adapter->num_q_vectors; i++) {
3560                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3561                         eics |= q_vector->eims_value;
3562                 }
3563                 wr32(E1000_EICS, eics);
3564         } else {
3565                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3566         }
3567
3568         igb_spoof_check(adapter);
3569
3570         /* Reset the timer */
3571         if (!test_bit(__IGB_DOWN, &adapter->state))
3572                 mod_timer(&adapter->watchdog_timer,
3573                           round_jiffies(jiffies + 2 * HZ));
3574 }
3575
3576 enum latency_range {
3577         lowest_latency = 0,
3578         low_latency = 1,
3579         bulk_latency = 2,
3580         latency_invalid = 255
3581 };
3582
3583 /**
3584  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3585  *
3586  *      Stores a new ITR value based on strictly on packet size.  This
3587  *      algorithm is less sophisticated than that used in igb_update_itr,
3588  *      due to the difficulty of synchronizing statistics across multiple
3589  *      receive rings.  The divisors and thresholds used by this function
3590  *      were determined based on theoretical maximum wire speed and testing
3591  *      data, in order to minimize response time while increasing bulk
3592  *      throughput.
3593  *      This functionality is controlled by the InterruptThrottleRate module
3594  *      parameter (see igb_param.c)
3595  *      NOTE:  This function is called only when operating in a multiqueue
3596  *             receive environment.
3597  * @q_vector: pointer to q_vector
3598  **/
3599 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3600 {
3601         int new_val = q_vector->itr_val;
3602         int avg_wire_size = 0;
3603         struct igb_adapter *adapter = q_vector->adapter;
3604         struct igb_ring *ring;
3605         unsigned int packets;
3606
3607         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3608          * ints/sec - ITR timer value of 120 ticks.
3609          */
3610         if (adapter->link_speed != SPEED_1000) {
3611                 new_val = 976;
3612                 goto set_itr_val;
3613         }
3614
3615         ring = q_vector->rx_ring;
3616         if (ring) {
3617                 packets = ACCESS_ONCE(ring->total_packets);
3618
3619                 if (packets)
3620                         avg_wire_size = ring->total_bytes / packets;
3621         }
3622
3623         ring = q_vector->tx_ring;
3624         if (ring) {
3625                 packets = ACCESS_ONCE(ring->total_packets);
3626
3627                 if (packets)
3628                         avg_wire_size = max_t(u32, avg_wire_size,
3629                                               ring->total_bytes / packets);
3630         }
3631
3632         /* if avg_wire_size isn't set no work was done */
3633         if (!avg_wire_size)
3634                 goto clear_counts;
3635
3636         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3637         avg_wire_size += 24;
3638
3639         /* Don't starve jumbo frames */
3640         avg_wire_size = min(avg_wire_size, 3000);
3641
3642         /* Give a little boost to mid-size frames */
3643         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3644                 new_val = avg_wire_size / 3;
3645         else
3646                 new_val = avg_wire_size / 2;
3647
3648         /* when in itr mode 3 do not exceed 20K ints/sec */
3649         if (adapter->rx_itr_setting == 3 && new_val < 196)
3650                 new_val = 196;
3651
3652 set_itr_val:
3653         if (new_val != q_vector->itr_val) {
3654                 q_vector->itr_val = new_val;
3655                 q_vector->set_itr = 1;
3656         }
3657 clear_counts:
3658         if (q_vector->rx_ring) {
3659                 q_vector->rx_ring->total_bytes = 0;
3660                 q_vector->rx_ring->total_packets = 0;
3661         }
3662         if (q_vector->tx_ring) {
3663                 q_vector->tx_ring->total_bytes = 0;
3664                 q_vector->tx_ring->total_packets = 0;
3665         }
3666 }
3667
3668 /**
3669  * igb_update_itr - update the dynamic ITR value based on statistics
3670  *      Stores a new ITR value based on packets and byte
3671  *      counts during the last interrupt.  The advantage of per interrupt
3672  *      computation is faster updates and more accurate ITR for the current
3673  *      traffic pattern.  Constants in this function were computed
3674  *      based on theoretical maximum wire speed and thresholds were set based
3675  *      on testing data as well as attempting to minimize response time
3676  *      while increasing bulk throughput.
3677  *      this functionality is controlled by the InterruptThrottleRate module
3678  *      parameter (see igb_param.c)
3679  *      NOTE:  These calculations are only valid when operating in a single-
3680  *             queue environment.
3681  * @adapter: pointer to adapter
3682  * @itr_setting: current q_vector->itr_val
3683  * @packets: the number of packets during this measurement interval
3684  * @bytes: the number of bytes during this measurement interval
3685  **/
3686 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3687                                    int packets, int bytes)
3688 {
3689         unsigned int retval = itr_setting;
3690
3691         if (packets == 0)
3692                 goto update_itr_done;
3693
3694         switch (itr_setting) {
3695         case lowest_latency:
3696                 /* handle TSO and jumbo frames */
3697                 if (bytes/packets > 8000)
3698                         retval = bulk_latency;
3699                 else if ((packets < 5) && (bytes > 512))
3700                         retval = low_latency;
3701                 break;
3702         case low_latency:  /* 50 usec aka 20000 ints/s */
3703                 if (bytes > 10000) {
3704                         /* this if handles the TSO accounting */
3705                         if (bytes/packets > 8000) {
3706                                 retval = bulk_latency;
3707                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3708                                 retval = bulk_latency;
3709                         } else if ((packets > 35)) {
3710                                 retval = lowest_latency;
3711                         }
3712                 } else if (bytes/packets > 2000) {
3713                         retval = bulk_latency;
3714                 } else if (packets <= 2 && bytes < 512) {
3715                         retval = lowest_latency;
3716                 }
3717                 break;
3718         case bulk_latency: /* 250 usec aka 4000 ints/s */
3719                 if (bytes > 25000) {
3720                         if (packets > 35)
3721                                 retval = low_latency;
3722                 } else if (bytes < 1500) {
3723                         retval = low_latency;
3724                 }
3725                 break;
3726         }
3727
3728 update_itr_done:
3729         return retval;
3730 }
3731
3732 static void igb_set_itr(struct igb_adapter *adapter)
3733 {
3734         struct igb_q_vector *q_vector = adapter->q_vector[0];
3735         u16 current_itr;
3736         u32 new_itr = q_vector->itr_val;
3737
3738         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3739         if (adapter->link_speed != SPEED_1000) {
3740                 current_itr = 0;
3741                 new_itr = 4000;
3742                 goto set_itr_now;
3743         }
3744
3745         adapter->rx_itr = igb_update_itr(adapter,
3746                                     adapter->rx_itr,
3747                                     q_vector->rx_ring->total_packets,
3748                                     q_vector->rx_ring->total_bytes);
3749
3750         adapter->tx_itr = igb_update_itr(adapter,
3751                                     adapter->tx_itr,
3752                                     q_vector->tx_ring->total_packets,
3753                                     q_vector->tx_ring->total_bytes);
3754         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3755
3756         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3757         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3758                 current_itr = low_latency;
3759
3760         switch (current_itr) {
3761         /* counts and packets in update_itr are dependent on these numbers */
3762         case lowest_latency:
3763                 new_itr = 56;  /* aka 70,000 ints/sec */
3764                 break;
3765         case low_latency:
3766                 new_itr = 196; /* aka 20,000 ints/sec */
3767                 break;
3768         case bulk_latency:
3769                 new_itr = 980; /* aka 4,000 ints/sec */
3770                 break;
3771         default:
3772                 break;
3773         }
3774
3775 set_itr_now:
3776         q_vector->rx_ring->total_bytes = 0;
3777         q_vector->rx_ring->total_packets = 0;
3778         q_vector->tx_ring->total_bytes = 0;
3779         q_vector->tx_ring->total_packets = 0;
3780
3781         if (new_itr != q_vector->itr_val) {
3782                 /* this attempts to bias the interrupt rate towards Bulk
3783                  * by adding intermediate steps when interrupt rate is
3784                  * increasing */
3785                 new_itr = new_itr > q_vector->itr_val ?
3786                              max((new_itr * q_vector->itr_val) /
3787                                  (new_itr + (q_vector->itr_val >> 2)),
3788                                  new_itr) :
3789                              new_itr;
3790                 /* Don't write the value here; it resets the adapter's
3791                  * internal timer, and causes us to delay far longer than
3792                  * we should between interrupts.  Instead, we write the ITR
3793                  * value at the beginning of the next interrupt so the timing
3794                  * ends up being correct.
3795                  */
3796                 q_vector->itr_val = new_itr;
3797                 q_vector->set_itr = 1;
3798         }
3799 }
3800
3801 #define IGB_TX_FLAGS_CSUM               0x00000001
3802 #define IGB_TX_FLAGS_VLAN               0x00000002
3803 #define IGB_TX_FLAGS_TSO                0x00000004
3804 #define IGB_TX_FLAGS_IPV4               0x00000008
3805 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3806 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3807 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3808
3809 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3810                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3811 {
3812         struct e1000_adv_tx_context_desc *context_desc;
3813         unsigned int i;
3814         int err;
3815         struct igb_buffer *buffer_info;
3816         u32 info = 0, tu_cmd = 0;
3817         u32 mss_l4len_idx;
3818         u8 l4len;
3819
3820         if (skb_header_cloned(skb)) {
3821                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3822                 if (err)
3823                         return err;
3824         }
3825
3826         l4len = tcp_hdrlen(skb);
3827         *hdr_len += l4len;
3828
3829         if (skb->protocol == htons(ETH_P_IP)) {
3830                 struct iphdr *iph = ip_hdr(skb);
3831                 iph->tot_len = 0;
3832                 iph->check = 0;
3833                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3834                                                          iph->daddr, 0,
3835                                                          IPPROTO_TCP,
3836                                                          0);
3837         } else if (skb_is_gso_v6(skb)) {
3838                 ipv6_hdr(skb)->payload_len = 0;
3839                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3840                                                        &ipv6_hdr(skb)->daddr,
3841                                                        0, IPPROTO_TCP, 0);
3842         }
3843
3844         i = tx_ring->next_to_use;
3845
3846         buffer_info = &tx_ring->buffer_info[i];
3847         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3848         /* VLAN MACLEN IPLEN */
3849         if (tx_flags & IGB_TX_FLAGS_VLAN)
3850                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3851         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3852         *hdr_len += skb_network_offset(skb);
3853         info |= skb_network_header_len(skb);
3854         *hdr_len += skb_network_header_len(skb);
3855         context_desc->vlan_macip_lens = cpu_to_le32(info);
3856
3857         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3858         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3859
3860         if (skb->protocol == htons(ETH_P_IP))
3861                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3862         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3863
3864         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3865
3866         /* MSS L4LEN IDX */
3867         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3868         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3869
3870         /* For 82575, context index must be unique per ring. */
3871         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3872                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3873
3874         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3875         context_desc->seqnum_seed = 0;
3876
3877         buffer_info->time_stamp = jiffies;
3878         buffer_info->next_to_watch = i;
3879         buffer_info->dma = 0;
3880         i++;
3881         if (i == tx_ring->count)
3882                 i = 0;
3883
3884         tx_ring->next_to_use = i;
3885
3886         return true;
3887 }
3888
3889 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3890                                    struct sk_buff *skb, u32 tx_flags)
3891 {
3892         struct e1000_adv_tx_context_desc *context_desc;
3893         struct device *dev = tx_ring->dev;
3894         struct igb_buffer *buffer_info;
3895         u32 info = 0, tu_cmd = 0;
3896         unsigned int i;
3897
3898         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3899             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3900                 i = tx_ring->next_to_use;
3901                 buffer_info = &tx_ring->buffer_info[i];
3902                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3903
3904                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3905                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3906
3907                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3908                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3909                         info |= skb_network_header_len(skb);
3910
3911                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3912
3913                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3914
3915                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3916                         __be16 protocol;
3917
3918                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3919                                 const struct vlan_ethhdr *vhdr =
3920                                           (const struct vlan_ethhdr*)skb->data;
3921
3922                                 protocol = vhdr->h_vlan_encapsulated_proto;
3923                         } else {
3924                                 protocol = skb->protocol;
3925                         }
3926
3927                         switch (protocol) {
3928                         case cpu_to_be16(ETH_P_IP):
3929                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3930                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3931                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3932                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3933                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3934                                 break;
3935                         case cpu_to_be16(ETH_P_IPV6):
3936                                 /* XXX what about other V6 headers?? */
3937                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3938                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3939                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3940                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3941                                 break;
3942                         default:
3943                                 if (unlikely(net_ratelimit()))
3944                                         dev_warn(dev,
3945                                             "partial checksum but proto=%x!\n",
3946                                             skb->protocol);
3947                                 break;
3948                         }
3949                 }
3950
3951                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3952                 context_desc->seqnum_seed = 0;
3953                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3954                         context_desc->mss_l4len_idx =
3955                                 cpu_to_le32(tx_ring->reg_idx << 4);
3956
3957                 buffer_info->time_stamp = jiffies;
3958                 buffer_info->next_to_watch = i;
3959                 buffer_info->dma = 0;
3960
3961                 i++;
3962                 if (i == tx_ring->count)
3963                         i = 0;
3964                 tx_ring->next_to_use = i;
3965
3966                 return true;
3967         }
3968         return false;
3969 }
3970
3971 #define IGB_MAX_TXD_PWR 16
3972 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3973
3974 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3975                                  unsigned int first)
3976 {
3977         struct igb_buffer *buffer_info;
3978         struct device *dev = tx_ring->dev;
3979         unsigned int hlen = skb_headlen(skb);
3980         unsigned int count = 0, i;
3981         unsigned int f;
3982         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3983
3984         i = tx_ring->next_to_use;
3985
3986         buffer_info = &tx_ring->buffer_info[i];
3987         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3988         buffer_info->length = hlen;
3989         /* set time_stamp *before* dma to help avoid a possible race */
3990         buffer_info->time_stamp = jiffies;
3991         buffer_info->next_to_watch = i;
3992         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3993                                           DMA_TO_DEVICE);
3994         if (dma_mapping_error(dev, buffer_info->dma))
3995                 goto dma_error;
3996
3997         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3998                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3999                 unsigned int len = frag->size;
4000
4001                 count++;
4002                 i++;
4003                 if (i == tx_ring->count)
4004                         i = 0;
4005
4006                 buffer_info = &tx_ring->buffer_info[i];
4007                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4008                 buffer_info->length = len;
4009                 buffer_info->time_stamp = jiffies;
4010                 buffer_info->next_to_watch = i;
4011                 buffer_info->mapped_as_page = true;
4012                 buffer_info->dma = dma_map_page(dev,
4013                                                 frag->page,
4014                                                 frag->page_offset,
4015                                                 len,
4016                                                 DMA_TO_DEVICE);
4017                 if (dma_mapping_error(dev, buffer_info->dma))
4018                         goto dma_error;
4019
4020         }
4021
4022         tx_ring->buffer_info[i].skb = skb;
4023         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4024         /* multiply data chunks by size of headers */
4025         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4026         tx_ring->buffer_info[i].gso_segs = gso_segs;
4027         tx_ring->buffer_info[first].next_to_watch = i;
4028
4029         return ++count;
4030
4031 dma_error:
4032         dev_err(dev, "TX DMA map failed\n");
4033
4034         /* clear timestamp and dma mappings for failed buffer_info mapping */
4035         buffer_info->dma = 0;
4036         buffer_info->time_stamp = 0;
4037         buffer_info->length = 0;
4038         buffer_info->next_to_watch = 0;
4039         buffer_info->mapped_as_page = false;
4040
4041         /* clear timestamp and dma mappings for remaining portion of packet */
4042         while (count--) {
4043                 if (i == 0)
4044                         i = tx_ring->count;
4045                 i--;
4046                 buffer_info = &tx_ring->buffer_info[i];
4047                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4048         }
4049
4050         return 0;
4051 }
4052
4053 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4054                                     u32 tx_flags, int count, u32 paylen,
4055                                     u8 hdr_len)
4056 {
4057         union e1000_adv_tx_desc *tx_desc;
4058         struct igb_buffer *buffer_info;
4059         u32 olinfo_status = 0, cmd_type_len;
4060         unsigned int i = tx_ring->next_to_use;
4061
4062         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4063                         E1000_ADVTXD_DCMD_DEXT);
4064
4065         if (tx_flags & IGB_TX_FLAGS_VLAN)
4066                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4067
4068         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4069                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4070
4071         if (tx_flags & IGB_TX_FLAGS_TSO) {
4072                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4073
4074                 /* insert tcp checksum */
4075                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4076
4077                 /* insert ip checksum */
4078                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4079                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4080
4081         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4082                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4083         }
4084
4085         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4086             (tx_flags & (IGB_TX_FLAGS_CSUM |
4087                          IGB_TX_FLAGS_TSO |
4088                          IGB_TX_FLAGS_VLAN)))
4089                 olinfo_status |= tx_ring->reg_idx << 4;
4090
4091         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4092
4093         do {
4094                 buffer_info = &tx_ring->buffer_info[i];
4095                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4096                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4097                 tx_desc->read.cmd_type_len =
4098                         cpu_to_le32(cmd_type_len | buffer_info->length);
4099                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4100                 count--;
4101                 i++;
4102                 if (i == tx_ring->count)
4103                         i = 0;
4104         } while (count > 0);
4105
4106         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4107         /* Force memory writes to complete before letting h/w
4108          * know there are new descriptors to fetch.  (Only
4109          * applicable for weak-ordered memory model archs,
4110          * such as IA-64). */
4111         wmb();
4112
4113         tx_ring->next_to_use = i;
4114         writel(i, tx_ring->tail);
4115         /* we need this if more than one processor can write to our tail
4116          * at a time, it syncronizes IO on IA64/Altix systems */
4117         mmiowb();
4118 }
4119
4120 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4121 {
4122         struct net_device *netdev = tx_ring->netdev;
4123
4124         netif_stop_subqueue(netdev, tx_ring->queue_index);
4125
4126         /* Herbert's original patch had:
4127          *  smp_mb__after_netif_stop_queue();
4128          * but since that doesn't exist yet, just open code it. */
4129         smp_mb();
4130
4131         /* We need to check again in a case another CPU has just
4132          * made room available. */
4133         if (igb_desc_unused(tx_ring) < size)
4134                 return -EBUSY;
4135
4136         /* A reprieve! */
4137         netif_wake_subqueue(netdev, tx_ring->queue_index);
4138
4139         u64_stats_update_begin(&tx_ring->tx_syncp2);
4140         tx_ring->tx_stats.restart_queue2++;
4141         u64_stats_update_end(&tx_ring->tx_syncp2);
4142
4143         return 0;
4144 }
4145
4146 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4147 {
4148         if (igb_desc_unused(tx_ring) >= size)
4149                 return 0;
4150         return __igb_maybe_stop_tx(tx_ring, size);
4151 }
4152
4153 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4154                                     struct igb_ring *tx_ring)
4155 {
4156         int tso = 0, count;
4157         u32 tx_flags = 0;
4158         u16 first;
4159         u8 hdr_len = 0;
4160
4161         /* need: 1 descriptor per page,
4162          *       + 2 desc gap to keep tail from touching head,
4163          *       + 1 desc for skb->data,
4164          *       + 1 desc for context descriptor,
4165          * otherwise try next time */
4166         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4167                 /* this is a hard error */
4168                 return NETDEV_TX_BUSY;
4169         }
4170
4171         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4172                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4173                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4174         }
4175
4176         if (vlan_tx_tag_present(skb)) {
4177                 tx_flags |= IGB_TX_FLAGS_VLAN;
4178                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4179         }
4180
4181         if (skb->protocol == htons(ETH_P_IP))
4182                 tx_flags |= IGB_TX_FLAGS_IPV4;
4183
4184         first = tx_ring->next_to_use;
4185         if (skb_is_gso(skb)) {
4186                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4187
4188                 if (tso < 0) {
4189                         dev_kfree_skb_any(skb);
4190                         return NETDEV_TX_OK;
4191                 }
4192         }
4193
4194         if (tso)
4195                 tx_flags |= IGB_TX_FLAGS_TSO;
4196         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4197                  (skb->ip_summed == CHECKSUM_PARTIAL))
4198                 tx_flags |= IGB_TX_FLAGS_CSUM;
4199
4200         /*
4201          * count reflects descriptors mapped, if 0 or less then mapping error
4202          * has occured and we need to rewind the descriptor queue
4203          */
4204         count = igb_tx_map_adv(tx_ring, skb, first);
4205         if (!count) {
4206                 dev_kfree_skb_any(skb);
4207                 tx_ring->buffer_info[first].time_stamp = 0;
4208                 tx_ring->next_to_use = first;
4209                 return NETDEV_TX_OK;
4210         }
4211
4212         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4213
4214         /* Make sure there is space in the ring for the next send. */
4215         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4216
4217         return NETDEV_TX_OK;
4218 }
4219
4220 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4221                                       struct net_device *netdev)
4222 {
4223         struct igb_adapter *adapter = netdev_priv(netdev);
4224         struct igb_ring *tx_ring;
4225         int r_idx = 0;
4226
4227         if (test_bit(__IGB_DOWN, &adapter->state)) {
4228                 dev_kfree_skb_any(skb);
4229                 return NETDEV_TX_OK;
4230         }
4231
4232         if (skb->len <= 0) {
4233                 dev_kfree_skb_any(skb);
4234                 return NETDEV_TX_OK;
4235         }
4236
4237         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4238         tx_ring = adapter->multi_tx_table[r_idx];
4239
4240         /* This goes back to the question of how to logically map a tx queue
4241          * to a flow.  Right now, performance is impacted slightly negatively
4242          * if using multiple tx queues.  If the stack breaks away from a
4243          * single qdisc implementation, we can look at this again. */
4244         return igb_xmit_frame_ring_adv(skb, tx_ring);
4245 }
4246
4247 /**
4248  * igb_tx_timeout - Respond to a Tx Hang
4249  * @netdev: network interface device structure
4250  **/
4251 static void igb_tx_timeout(struct net_device *netdev)
4252 {
4253         struct igb_adapter *adapter = netdev_priv(netdev);
4254         struct e1000_hw *hw = &adapter->hw;
4255
4256         /* Do the reset outside of interrupt context */
4257         adapter->tx_timeout_count++;
4258
4259         if (hw->mac.type == e1000_82580)
4260                 hw->dev_spec._82575.global_device_reset = true;
4261
4262         schedule_work(&adapter->reset_task);
4263         wr32(E1000_EICS,
4264              (adapter->eims_enable_mask & ~adapter->eims_other));
4265 }
4266
4267 static void igb_reset_task(struct work_struct *work)
4268 {
4269         struct igb_adapter *adapter;
4270         adapter = container_of(work, struct igb_adapter, reset_task);
4271
4272         igb_dump(adapter);
4273         netdev_err(adapter->netdev, "Reset adapter\n");
4274         igb_reinit_locked(adapter);
4275 }
4276
4277 /**
4278  * igb_get_stats64 - Get System Network Statistics
4279  * @netdev: network interface device structure
4280  * @stats: rtnl_link_stats64 pointer
4281  *
4282  **/
4283 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4284                                                  struct rtnl_link_stats64 *stats)
4285 {
4286         struct igb_adapter *adapter = netdev_priv(netdev);
4287
4288         spin_lock(&adapter->stats64_lock);
4289         igb_update_stats(adapter, &adapter->stats64);
4290         memcpy(stats, &adapter->stats64, sizeof(*stats));
4291         spin_unlock(&adapter->stats64_lock);
4292
4293         return stats;
4294 }
4295
4296 /**
4297  * igb_change_mtu - Change the Maximum Transfer Unit
4298  * @netdev: network interface device structure
4299  * @new_mtu: new value for maximum frame size
4300  *
4301  * Returns 0 on success, negative on failure
4302  **/
4303 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4304 {
4305         struct igb_adapter *adapter = netdev_priv(netdev);
4306         struct pci_dev *pdev = adapter->pdev;
4307         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4308         u32 rx_buffer_len, i;
4309
4310         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4311                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4312                 return -EINVAL;
4313         }
4314
4315         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4316                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4317                 return -EINVAL;
4318         }
4319
4320         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4321                 msleep(1);
4322
4323         /* igb_down has a dependency on max_frame_size */
4324         adapter->max_frame_size = max_frame;
4325
4326         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4327          * means we reserve 2 more, this pushes us to allocate from the next
4328          * larger slab size.
4329          * i.e. RXBUFFER_2048 --> size-4096 slab
4330          */
4331
4332         if (adapter->hw.mac.type == e1000_82580)
4333                 max_frame += IGB_TS_HDR_LEN;
4334
4335         if (max_frame <= IGB_RXBUFFER_1024)
4336                 rx_buffer_len = IGB_RXBUFFER_1024;
4337         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4338                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4339         else
4340                 rx_buffer_len = IGB_RXBUFFER_128;
4341
4342         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4343              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4344                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4345
4346         if ((adapter->hw.mac.type == e1000_82580) &&
4347             (rx_buffer_len == IGB_RXBUFFER_128))
4348                 rx_buffer_len += IGB_RXBUFFER_64;
4349
4350         if (netif_running(netdev))
4351                 igb_down(adapter);
4352
4353         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4354                  netdev->mtu, new_mtu);
4355         netdev->mtu = new_mtu;
4356
4357         for (i = 0; i < adapter->num_rx_queues; i++)
4358                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4359
4360         if (netif_running(netdev))
4361                 igb_up(adapter);
4362         else
4363                 igb_reset(adapter);
4364
4365         clear_bit(__IGB_RESETTING, &adapter->state);
4366
4367         return 0;
4368 }
4369
4370 /**
4371  * igb_update_stats - Update the board statistics counters
4372  * @adapter: board private structure
4373  **/
4374
4375 void igb_update_stats(struct igb_adapter *adapter,
4376                       struct rtnl_link_stats64 *net_stats)
4377 {
4378         struct e1000_hw *hw = &adapter->hw;
4379         struct pci_dev *pdev = adapter->pdev;
4380         u32 reg, mpc;
4381         u16 phy_tmp;
4382         int i;
4383         u64 bytes, packets;
4384         unsigned int start;
4385         u64 _bytes, _packets;
4386
4387 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4388
4389         /*
4390          * Prevent stats update while adapter is being reset, or if the pci
4391          * connection is down.
4392          */
4393         if (adapter->link_speed == 0)
4394                 return;
4395         if (pci_channel_offline(pdev))
4396                 return;
4397
4398         bytes = 0;
4399         packets = 0;
4400         for (i = 0; i < adapter->num_rx_queues; i++) {
4401                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4402                 struct igb_ring *ring = adapter->rx_ring[i];
4403
4404                 ring->rx_stats.drops += rqdpc_tmp;
4405                 net_stats->rx_fifo_errors += rqdpc_tmp;
4406
4407                 do {
4408                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4409                         _bytes = ring->rx_stats.bytes;
4410                         _packets = ring->rx_stats.packets;
4411                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4412                 bytes += _bytes;
4413                 packets += _packets;
4414         }
4415
4416         net_stats->rx_bytes = bytes;
4417         net_stats->rx_packets = packets;
4418
4419         bytes = 0;
4420         packets = 0;
4421         for (i = 0; i < adapter->num_tx_queues; i++) {
4422                 struct igb_ring *ring = adapter->tx_ring[i];
4423                 do {
4424                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4425                         _bytes = ring->tx_stats.bytes;
4426                         _packets = ring->tx_stats.packets;
4427                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4428                 bytes += _bytes;
4429                 packets += _packets;
4430         }
4431         net_stats->tx_bytes = bytes;
4432         net_stats->tx_packets = packets;
4433
4434         /* read stats registers */
4435         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4436         adapter->stats.gprc += rd32(E1000_GPRC);
4437         adapter->stats.gorc += rd32(E1000_GORCL);
4438         rd32(E1000_GORCH); /* clear GORCL */
4439         adapter->stats.bprc += rd32(E1000_BPRC);
4440         adapter->stats.mprc += rd32(E1000_MPRC);
4441         adapter->stats.roc += rd32(E1000_ROC);
4442
4443         adapter->stats.prc64 += rd32(E1000_PRC64);
4444         adapter->stats.prc127 += rd32(E1000_PRC127);
4445         adapter->stats.prc255 += rd32(E1000_PRC255);
4446         adapter->stats.prc511 += rd32(E1000_PRC511);
4447         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4448         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4449         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4450         adapter->stats.sec += rd32(E1000_SEC);
4451
4452         mpc = rd32(E1000_MPC);
4453         adapter->stats.mpc += mpc;
4454         net_stats->rx_fifo_errors += mpc;
4455         adapter->stats.scc += rd32(E1000_SCC);
4456         adapter->stats.ecol += rd32(E1000_ECOL);
4457         adapter->stats.mcc += rd32(E1000_MCC);
4458         adapter->stats.latecol += rd32(E1000_LATECOL);
4459         adapter->stats.dc += rd32(E1000_DC);
4460         adapter->stats.rlec += rd32(E1000_RLEC);
4461         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4462         adapter->stats.xontxc += rd32(E1000_XONTXC);
4463         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4464         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4465         adapter->stats.fcruc += rd32(E1000_FCRUC);
4466         adapter->stats.gptc += rd32(E1000_GPTC);
4467         adapter->stats.gotc += rd32(E1000_GOTCL);
4468         rd32(E1000_GOTCH); /* clear GOTCL */
4469         adapter->stats.rnbc += rd32(E1000_RNBC);
4470         adapter->stats.ruc += rd32(E1000_RUC);
4471         adapter->stats.rfc += rd32(E1000_RFC);
4472         adapter->stats.rjc += rd32(E1000_RJC);
4473         adapter->stats.tor += rd32(E1000_TORH);
4474         adapter->stats.tot += rd32(E1000_TOTH);
4475         adapter->stats.tpr += rd32(E1000_TPR);
4476
4477         adapter->stats.ptc64 += rd32(E1000_PTC64);
4478         adapter->stats.ptc127 += rd32(E1000_PTC127);
4479         adapter->stats.ptc255 += rd32(E1000_PTC255);
4480         adapter->stats.ptc511 += rd32(E1000_PTC511);
4481         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4482         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4483
4484         adapter->stats.mptc += rd32(E1000_MPTC);
4485         adapter->stats.bptc += rd32(E1000_BPTC);
4486
4487         adapter->stats.tpt += rd32(E1000_TPT);
4488         adapter->stats.colc += rd32(E1000_COLC);
4489
4490         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4491         /* read internal phy specific stats */
4492         reg = rd32(E1000_CTRL_EXT);
4493         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4494                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4495                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4496         }
4497
4498         adapter->stats.tsctc += rd32(E1000_TSCTC);
4499         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4500
4501         adapter->stats.iac += rd32(E1000_IAC);
4502         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4503         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4504         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4505         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4506         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4507         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4508         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4509         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4510
4511         /* Fill out the OS statistics structure */
4512         net_stats->multicast = adapter->stats.mprc;
4513         net_stats->collisions = adapter->stats.colc;
4514
4515         /* Rx Errors */
4516
4517         /* RLEC on some newer hardware can be incorrect so build
4518          * our own version based on RUC and ROC */
4519         net_stats->rx_errors = adapter->stats.rxerrc +
4520                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4521                 adapter->stats.ruc + adapter->stats.roc +
4522                 adapter->stats.cexterr;
4523         net_stats->rx_length_errors = adapter->stats.ruc +
4524                                       adapter->stats.roc;
4525         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4526         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4527         net_stats->rx_missed_errors = adapter->stats.mpc;
4528
4529         /* Tx Errors */
4530         net_stats->tx_errors = adapter->stats.ecol +
4531                                adapter->stats.latecol;
4532         net_stats->tx_aborted_errors = adapter->stats.ecol;
4533         net_stats->tx_window_errors = adapter->stats.latecol;
4534         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4535
4536         /* Tx Dropped needs to be maintained elsewhere */
4537
4538         /* Phy Stats */
4539         if (hw->phy.media_type == e1000_media_type_copper) {
4540                 if ((adapter->link_speed == SPEED_1000) &&
4541                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4542                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4543                         adapter->phy_stats.idle_errors += phy_tmp;
4544                 }
4545         }
4546
4547         /* Management Stats */
4548         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4549         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4550         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4551 }
4552
4553 static irqreturn_t igb_msix_other(int irq, void *data)
4554 {
4555         struct igb_adapter *adapter = data;
4556         struct e1000_hw *hw = &adapter->hw;
4557         u32 icr = rd32(E1000_ICR);
4558         /* reading ICR causes bit 31 of EICR to be cleared */
4559
4560         if (icr & E1000_ICR_DRSTA)
4561                 schedule_work(&adapter->reset_task);
4562
4563         if (icr & E1000_ICR_DOUTSYNC) {
4564                 /* HW is reporting DMA is out of sync */
4565                 adapter->stats.doosync++;
4566                 /* The DMA Out of Sync is also indication of a spoof event
4567                  * in IOV mode. Check the Wrong VM Behavior register to
4568                  * see if it is really a spoof event. */
4569                 igb_check_wvbr(adapter);
4570         }
4571
4572         /* Check for a mailbox event */
4573         if (icr & E1000_ICR_VMMB)
4574                 igb_msg_task(adapter);
4575
4576         if (icr & E1000_ICR_LSC) {
4577                 hw->mac.get_link_status = 1;
4578                 /* guard against interrupt when we're going down */
4579                 if (!test_bit(__IGB_DOWN, &adapter->state))
4580                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4581         }
4582
4583         if (adapter->vfs_allocated_count)
4584                 wr32(E1000_IMS, E1000_IMS_LSC |
4585                                 E1000_IMS_VMMB |
4586                                 E1000_IMS_DOUTSYNC);
4587         else
4588                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4589         wr32(E1000_EIMS, adapter->eims_other);
4590
4591         return IRQ_HANDLED;
4592 }
4593
4594 static void igb_write_itr(struct igb_q_vector *q_vector)
4595 {
4596         struct igb_adapter *adapter = q_vector->adapter;
4597         u32 itr_val = q_vector->itr_val & 0x7FFC;
4598
4599         if (!q_vector->set_itr)
4600                 return;
4601
4602         if (!itr_val)
4603                 itr_val = 0x4;
4604
4605         if (adapter->hw.mac.type == e1000_82575)
4606                 itr_val |= itr_val << 16;
4607         else
4608                 itr_val |= 0x8000000;
4609
4610         writel(itr_val, q_vector->itr_register);
4611         q_vector->set_itr = 0;
4612 }
4613
4614 static irqreturn_t igb_msix_ring(int irq, void *data)
4615 {
4616         struct igb_q_vector *q_vector = data;
4617
4618         /* Write the ITR value calculated from the previous interrupt. */
4619         igb_write_itr(q_vector);
4620
4621         napi_schedule(&q_vector->napi);
4622
4623         return IRQ_HANDLED;
4624 }
4625
4626 #ifdef CONFIG_IGB_DCA
4627 static void igb_update_dca(struct igb_q_vector *q_vector)
4628 {
4629         struct igb_adapter *adapter = q_vector->adapter;
4630         struct e1000_hw *hw = &adapter->hw;
4631         int cpu = get_cpu();
4632
4633         if (q_vector->cpu == cpu)
4634                 goto out_no_update;
4635
4636         if (q_vector->tx_ring) {
4637                 int q = q_vector->tx_ring->reg_idx;
4638                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4639                 if (hw->mac.type == e1000_82575) {
4640                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4641                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4642                 } else {
4643                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4644                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4645                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4646                 }
4647                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4648                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4649         }
4650         if (q_vector->rx_ring) {
4651                 int q = q_vector->rx_ring->reg_idx;
4652                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4653                 if (hw->mac.type == e1000_82575) {
4654                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4655                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4656                 } else {
4657                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4658                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4659                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4660                 }
4661                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4662                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4663                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4664                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4665         }
4666         q_vector->cpu = cpu;
4667 out_no_update:
4668         put_cpu();
4669 }
4670
4671 static void igb_setup_dca(struct igb_adapter *adapter)
4672 {
4673         struct e1000_hw *hw = &adapter->hw;
4674         int i;
4675
4676         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4677                 return;
4678
4679         /* Always use CB2 mode, difference is masked in the CB driver. */
4680         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4681
4682         for (i = 0; i < adapter->num_q_vectors; i++) {
4683                 adapter->q_vector[i]->cpu = -1;
4684                 igb_update_dca(adapter->q_vector[i]);
4685         }
4686 }
4687
4688 static int __igb_notify_dca(struct device *dev, void *data)
4689 {
4690         struct net_device *netdev = dev_get_drvdata(dev);
4691         struct igb_adapter *adapter = netdev_priv(netdev);
4692         struct pci_dev *pdev = adapter->pdev;
4693         struct e1000_hw *hw = &adapter->hw;
4694         unsigned long event = *(unsigned long *)data;
4695
4696         switch (event) {
4697         case DCA_PROVIDER_ADD:
4698                 /* if already enabled, don't do it again */
4699                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4700                         break;
4701                 if (dca_add_requester(dev) == 0) {
4702                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4703                         dev_info(&pdev->dev, "DCA enabled\n");
4704                         igb_setup_dca(adapter);
4705                         break;
4706                 }
4707                 /* Fall Through since DCA is disabled. */
4708         case DCA_PROVIDER_REMOVE:
4709                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4710                         /* without this a class_device is left
4711                          * hanging around in the sysfs model */
4712                         dca_remove_requester(dev);
4713                         dev_info(&pdev->dev, "DCA disabled\n");
4714                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4715                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4716                 }
4717                 break;
4718         }
4719
4720         return 0;
4721 }
4722
4723 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4724                           void *p)
4725 {
4726         int ret_val;
4727
4728         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4729                                          __igb_notify_dca);
4730
4731         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4732 }
4733 #endif /* CONFIG_IGB_DCA */
4734
4735 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4736 {
4737         struct e1000_hw *hw = &adapter->hw;
4738         u32 ping;
4739         int i;
4740
4741         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4742                 ping = E1000_PF_CONTROL_MSG;
4743                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4744                         ping |= E1000_VT_MSGTYPE_CTS;
4745                 igb_write_mbx(hw, &ping, 1, i);
4746         }
4747 }
4748
4749 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4750 {
4751         struct e1000_hw *hw = &adapter->hw;
4752         u32 vmolr = rd32(E1000_VMOLR(vf));
4753         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4754
4755         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4756                             IGB_VF_FLAG_MULTI_PROMISC);
4757         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4758
4759         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4760                 vmolr |= E1000_VMOLR_MPME;
4761                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4762                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4763         } else {
4764                 /*
4765                  * if we have hashes and we are clearing a multicast promisc
4766                  * flag we need to write the hashes to the MTA as this step
4767                  * was previously skipped
4768                  */
4769                 if (vf_data->num_vf_mc_hashes > 30) {
4770                         vmolr |= E1000_VMOLR_MPME;
4771                 } else if (vf_data->num_vf_mc_hashes) {
4772                         int j;
4773                         vmolr |= E1000_VMOLR_ROMPE;
4774                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4775                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4776                 }
4777         }
4778
4779         wr32(E1000_VMOLR(vf), vmolr);
4780
4781         /* there are flags left unprocessed, likely not supported */
4782         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4783                 return -EINVAL;
4784
4785         return 0;
4786
4787 }
4788
4789 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4790                                   u32 *msgbuf, u32 vf)
4791 {
4792         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4793         u16 *hash_list = (u16 *)&msgbuf[1];
4794         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4795         int i;
4796
4797         /* salt away the number of multicast addresses assigned
4798          * to this VF for later use to restore when the PF multi cast
4799          * list changes
4800          */
4801         vf_data->num_vf_mc_hashes = n;
4802
4803         /* only up to 30 hash values supported */
4804         if (n > 30)
4805                 n = 30;
4806
4807         /* store the hashes for later use */
4808         for (i = 0; i < n; i++)
4809                 vf_data->vf_mc_hashes[i] = hash_list[i];
4810
4811         /* Flush and reset the mta with the new values */
4812         igb_set_rx_mode(adapter->netdev);
4813
4814         return 0;
4815 }
4816
4817 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4818 {
4819         struct e1000_hw *hw = &adapter->hw;
4820         struct vf_data_storage *vf_data;
4821         int i, j;
4822
4823         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4824                 u32 vmolr = rd32(E1000_VMOLR(i));
4825                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4826
4827                 vf_data = &adapter->vf_data[i];
4828
4829                 if ((vf_data->num_vf_mc_hashes > 30) ||
4830                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4831                         vmolr |= E1000_VMOLR_MPME;
4832                 } else if (vf_data->num_vf_mc_hashes) {
4833                         vmolr |= E1000_VMOLR_ROMPE;
4834                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4835                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4836                 }
4837                 wr32(E1000_VMOLR(i), vmolr);
4838         }
4839 }
4840
4841 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4842 {
4843         struct e1000_hw *hw = &adapter->hw;
4844         u32 pool_mask, reg, vid;
4845         int i;
4846
4847         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4848
4849         /* Find the vlan filter for this id */
4850         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4851                 reg = rd32(E1000_VLVF(i));
4852
4853                 /* remove the vf from the pool */
4854                 reg &= ~pool_mask;
4855
4856                 /* if pool is empty then remove entry from vfta */
4857                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4858                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4859                         reg = 0;
4860                         vid = reg & E1000_VLVF_VLANID_MASK;
4861                         igb_vfta_set(hw, vid, false);
4862                 }
4863
4864                 wr32(E1000_VLVF(i), reg);
4865         }
4866
4867         adapter->vf_data[vf].vlans_enabled = 0;
4868 }
4869
4870 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4871 {
4872         struct e1000_hw *hw = &adapter->hw;
4873         u32 reg, i;
4874
4875         /* The vlvf table only exists on 82576 hardware and newer */
4876         if (hw->mac.type < e1000_82576)
4877                 return -1;
4878
4879         /* we only need to do this if VMDq is enabled */
4880         if (!adapter->vfs_allocated_count)
4881                 return -1;
4882
4883         /* Find the vlan filter for this id */
4884         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4885                 reg = rd32(E1000_VLVF(i));
4886                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4887                     vid == (reg & E1000_VLVF_VLANID_MASK))
4888                         break;
4889         }
4890
4891         if (add) {
4892                 if (i == E1000_VLVF_ARRAY_SIZE) {
4893                         /* Did not find a matching VLAN ID entry that was
4894                          * enabled.  Search for a free filter entry, i.e.
4895                          * one without the enable bit set
4896                          */
4897                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4898                                 reg = rd32(E1000_VLVF(i));
4899                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4900                                         break;
4901                         }
4902                 }
4903                 if (i < E1000_VLVF_ARRAY_SIZE) {
4904                         /* Found an enabled/available entry */
4905                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4906
4907                         /* if !enabled we need to set this up in vfta */
4908                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4909                                 /* add VID to filter table */
4910                                 igb_vfta_set(hw, vid, true);
4911                                 reg |= E1000_VLVF_VLANID_ENABLE;
4912                         }
4913                         reg &= ~E1000_VLVF_VLANID_MASK;
4914                         reg |= vid;
4915                         wr32(E1000_VLVF(i), reg);
4916
4917                         /* do not modify RLPML for PF devices */
4918                         if (vf >= adapter->vfs_allocated_count)
4919                                 return 0;
4920
4921                         if (!adapter->vf_data[vf].vlans_enabled) {
4922                                 u32 size;
4923                                 reg = rd32(E1000_VMOLR(vf));
4924                                 size = reg & E1000_VMOLR_RLPML_MASK;
4925                                 size += 4;
4926                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4927                                 reg |= size;
4928                                 wr32(E1000_VMOLR(vf), reg);
4929                         }
4930
4931                         adapter->vf_data[vf].vlans_enabled++;
4932                         return 0;
4933                 }
4934         } else {
4935                 if (i < E1000_VLVF_ARRAY_SIZE) {
4936                         /* remove vf from the pool */
4937                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4938                         /* if pool is empty then remove entry from vfta */
4939                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4940                                 reg = 0;
4941                                 igb_vfta_set(hw, vid, false);
4942                         }
4943                         wr32(E1000_VLVF(i), reg);
4944
4945                         /* do not modify RLPML for PF devices */
4946                         if (vf >= adapter->vfs_allocated_count)
4947                                 return 0;
4948
4949                         adapter->vf_data[vf].vlans_enabled--;
4950                         if (!adapter->vf_data[vf].vlans_enabled) {
4951                                 u32 size;
4952                                 reg = rd32(E1000_VMOLR(vf));
4953                                 size = reg & E1000_VMOLR_RLPML_MASK;
4954                                 size -= 4;
4955                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4956                                 reg |= size;
4957                                 wr32(E1000_VMOLR(vf), reg);
4958                         }
4959                 }
4960         }
4961         return 0;
4962 }
4963
4964 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4965 {
4966         struct e1000_hw *hw = &adapter->hw;
4967
4968         if (vid)
4969                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4970         else
4971                 wr32(E1000_VMVIR(vf), 0);
4972 }
4973
4974 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4975                                int vf, u16 vlan, u8 qos)
4976 {
4977         int err = 0;
4978         struct igb_adapter *adapter = netdev_priv(netdev);
4979
4980         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4981                 return -EINVAL;
4982         if (vlan || qos) {
4983                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4984                 if (err)
4985                         goto out;
4986                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4987                 igb_set_vmolr(adapter, vf, !vlan);
4988                 adapter->vf_data[vf].pf_vlan = vlan;
4989                 adapter->vf_data[vf].pf_qos = qos;
4990                 dev_info(&adapter->pdev->dev,
4991                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4992                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4993                         dev_warn(&adapter->pdev->dev,
4994                                  "The VF VLAN has been set,"
4995                                  " but the PF device is not up.\n");
4996                         dev_warn(&adapter->pdev->dev,
4997                                  "Bring the PF device up before"
4998                                  " attempting to use the VF device.\n");
4999                 }
5000         } else {
5001                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5002                                    false, vf);
5003                 igb_set_vmvir(adapter, vlan, vf);
5004                 igb_set_vmolr(adapter, vf, true);
5005                 adapter->vf_data[vf].pf_vlan = 0;
5006                 adapter->vf_data[vf].pf_qos = 0;
5007        }
5008 out:
5009        return err;
5010 }
5011
5012 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5013 {
5014         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5015         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5016
5017         return igb_vlvf_set(adapter, vid, add, vf);
5018 }
5019
5020 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5021 {
5022         /* clear flags - except flag that indicates PF has set the MAC */
5023         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5024         adapter->vf_data[vf].last_nack = jiffies;
5025
5026         /* reset offloads to defaults */
5027         igb_set_vmolr(adapter, vf, true);
5028
5029         /* reset vlans for device */
5030         igb_clear_vf_vfta(adapter, vf);
5031         if (adapter->vf_data[vf].pf_vlan)
5032                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5033                                     adapter->vf_data[vf].pf_vlan,
5034                                     adapter->vf_data[vf].pf_qos);
5035         else
5036                 igb_clear_vf_vfta(adapter, vf);
5037
5038         /* reset multicast table array for vf */
5039         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5040
5041         /* Flush and reset the mta with the new values */
5042         igb_set_rx_mode(adapter->netdev);
5043 }
5044
5045 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5046 {
5047         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5048
5049         /* generate a new mac address as we were hotplug removed/added */
5050         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5051                 random_ether_addr(vf_mac);
5052
5053         /* process remaining reset events */
5054         igb_vf_reset(adapter, vf);
5055 }
5056
5057 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5058 {
5059         struct e1000_hw *hw = &adapter->hw;
5060         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5061         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5062         u32 reg, msgbuf[3];
5063         u8 *addr = (u8 *)(&msgbuf[1]);
5064
5065         /* process all the same items cleared in a function level reset */
5066         igb_vf_reset(adapter, vf);
5067
5068         /* set vf mac address */
5069         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5070
5071         /* enable transmit and receive for vf */
5072         reg = rd32(E1000_VFTE);
5073         wr32(E1000_VFTE, reg | (1 << vf));
5074         reg = rd32(E1000_VFRE);
5075         wr32(E1000_VFRE, reg | (1 << vf));
5076
5077         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5078
5079         /* reply to reset with ack and vf mac address */
5080         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5081         memcpy(addr, vf_mac, 6);
5082         igb_write_mbx(hw, msgbuf, 3, vf);
5083 }
5084
5085 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5086 {
5087         /*
5088          * The VF MAC Address is stored in a packed array of bytes
5089          * starting at the second 32 bit word of the msg array
5090          */
5091         unsigned char *addr = (char *)&msg[1];
5092         int err = -1;
5093
5094         if (is_valid_ether_addr(addr))
5095                 err = igb_set_vf_mac(adapter, vf, addr);
5096
5097         return err;
5098 }
5099
5100 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5101 {
5102         struct e1000_hw *hw = &adapter->hw;
5103         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5104         u32 msg = E1000_VT_MSGTYPE_NACK;
5105
5106         /* if device isn't clear to send it shouldn't be reading either */
5107         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5108             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5109                 igb_write_mbx(hw, &msg, 1, vf);
5110                 vf_data->last_nack = jiffies;
5111         }
5112 }
5113
5114 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5115 {
5116         struct pci_dev *pdev = adapter->pdev;
5117         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5118         struct e1000_hw *hw = &adapter->hw;
5119         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5120         s32 retval;
5121
5122         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5123
5124         if (retval) {
5125                 /* if receive failed revoke VF CTS stats and restart init */
5126                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5127                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5128                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5129                         return;
5130                 goto out;
5131         }
5132
5133         /* this is a message we already processed, do nothing */
5134         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5135                 return;
5136
5137         /*
5138          * until the vf completes a reset it should not be
5139          * allowed to start any configuration.
5140          */
5141
5142         if (msgbuf[0] == E1000_VF_RESET) {
5143                 igb_vf_reset_msg(adapter, vf);
5144                 return;
5145         }
5146
5147         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5148                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5149                         return;
5150                 retval = -1;
5151                 goto out;
5152         }
5153
5154         switch ((msgbuf[0] & 0xFFFF)) {
5155         case E1000_VF_SET_MAC_ADDR:
5156                 retval = -EINVAL;
5157                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5158                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5159                 else
5160                         dev_warn(&pdev->dev,
5161                                  "VF %d attempted to override administratively "
5162                                  "set MAC address\nReload the VF driver to "
5163                                  "resume operations\n", vf);
5164                 break;
5165         case E1000_VF_SET_PROMISC:
5166                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5167                 break;
5168         case E1000_VF_SET_MULTICAST:
5169                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5170                 break;
5171         case E1000_VF_SET_LPE:
5172                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5173                 break;
5174         case E1000_VF_SET_VLAN:
5175                 retval = -1;
5176                 if (vf_data->pf_vlan)
5177                         dev_warn(&pdev->dev,
5178                                  "VF %d attempted to override administratively "
5179                                  "set VLAN tag\nReload the VF driver to "
5180                                  "resume operations\n", vf);
5181                 else
5182                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5183                 break;
5184         default:
5185                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5186                 retval = -1;
5187                 break;
5188         }
5189
5190         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5191 out:
5192         /* notify the VF of the results of what it sent us */
5193         if (retval)
5194                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5195         else
5196                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5197
5198         igb_write_mbx(hw, msgbuf, 1, vf);
5199 }
5200
5201 static void igb_msg_task(struct igb_adapter *adapter)
5202 {
5203         struct e1000_hw *hw = &adapter->hw;
5204         u32 vf;
5205
5206         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5207                 /* process any reset requests */
5208                 if (!igb_check_for_rst(hw, vf))
5209                         igb_vf_reset_event(adapter, vf);
5210
5211                 /* process any messages pending */
5212                 if (!igb_check_for_msg(hw, vf))
5213                         igb_rcv_msg_from_vf(adapter, vf);
5214
5215                 /* process any acks */
5216                 if (!igb_check_for_ack(hw, vf))
5217                         igb_rcv_ack_from_vf(adapter, vf);
5218         }
5219 }
5220
5221 /**
5222  *  igb_set_uta - Set unicast filter table address
5223  *  @adapter: board private structure
5224  *
5225  *  The unicast table address is a register array of 32-bit registers.
5226  *  The table is meant to be used in a way similar to how the MTA is used
5227  *  however due to certain limitations in the hardware it is necessary to
5228  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5229  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5230  **/
5231 static void igb_set_uta(struct igb_adapter *adapter)
5232 {
5233         struct e1000_hw *hw = &adapter->hw;
5234         int i;
5235
5236         /* The UTA table only exists on 82576 hardware and newer */
5237         if (hw->mac.type < e1000_82576)
5238                 return;
5239
5240         /* we only need to do this if VMDq is enabled */
5241         if (!adapter->vfs_allocated_count)
5242                 return;
5243
5244         for (i = 0; i < hw->mac.uta_reg_count; i++)
5245                 array_wr32(E1000_UTA, i, ~0);
5246 }
5247
5248 /**
5249  * igb_intr_msi - Interrupt Handler
5250  * @irq: interrupt number
5251  * @data: pointer to a network interface device structure
5252  **/
5253 static irqreturn_t igb_intr_msi(int irq, void *data)
5254 {
5255         struct igb_adapter *adapter = data;
5256         struct igb_q_vector *q_vector = adapter->q_vector[0];
5257         struct e1000_hw *hw = &adapter->hw;
5258         /* read ICR disables interrupts using IAM */
5259         u32 icr = rd32(E1000_ICR);
5260
5261         igb_write_itr(q_vector);
5262
5263         if (icr & E1000_ICR_DRSTA)
5264                 schedule_work(&adapter->reset_task);
5265
5266         if (icr & E1000_ICR_DOUTSYNC) {
5267                 /* HW is reporting DMA is out of sync */
5268                 adapter->stats.doosync++;
5269         }
5270
5271         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5272                 hw->mac.get_link_status = 1;
5273                 if (!test_bit(__IGB_DOWN, &adapter->state))
5274                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5275         }
5276
5277         napi_schedule(&q_vector->napi);
5278
5279         return IRQ_HANDLED;
5280 }
5281
5282 /**
5283  * igb_intr - Legacy Interrupt Handler
5284  * @irq: interrupt number
5285  * @data: pointer to a network interface device structure
5286  **/
5287 static irqreturn_t igb_intr(int irq, void *data)
5288 {
5289         struct igb_adapter *adapter = data;
5290         struct igb_q_vector *q_vector = adapter->q_vector[0];
5291         struct e1000_hw *hw = &adapter->hw;
5292         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5293          * need for the IMC write */
5294         u32 icr = rd32(E1000_ICR);
5295         if (!icr)
5296                 return IRQ_NONE;  /* Not our interrupt */
5297
5298         igb_write_itr(q_vector);
5299
5300         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5301          * not set, then the adapter didn't send an interrupt */
5302         if (!(icr & E1000_ICR_INT_ASSERTED))
5303                 return IRQ_NONE;
5304
5305         if (icr & E1000_ICR_DRSTA)
5306                 schedule_work(&adapter->reset_task);
5307
5308         if (icr & E1000_ICR_DOUTSYNC) {
5309                 /* HW is reporting DMA is out of sync */
5310                 adapter->stats.doosync++;
5311         }
5312
5313         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5314                 hw->mac.get_link_status = 1;
5315                 /* guard against interrupt when we're going down */
5316                 if (!test_bit(__IGB_DOWN, &adapter->state))
5317                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5318         }
5319
5320         napi_schedule(&q_vector->napi);
5321
5322         return IRQ_HANDLED;
5323 }
5324
5325 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5326 {
5327         struct igb_adapter *adapter = q_vector->adapter;
5328         struct e1000_hw *hw = &adapter->hw;
5329
5330         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5331             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5332                 if (!adapter->msix_entries)
5333                         igb_set_itr(adapter);
5334                 else
5335                         igb_update_ring_itr(q_vector);
5336         }
5337
5338         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5339                 if (adapter->msix_entries)
5340                         wr32(E1000_EIMS, q_vector->eims_value);
5341                 else
5342                         igb_irq_enable(adapter);
5343         }
5344 }
5345
5346 /**
5347  * igb_poll - NAPI Rx polling callback
5348  * @napi: napi polling structure
5349  * @budget: count of how many packets we should handle
5350  **/
5351 static int igb_poll(struct napi_struct *napi, int budget)
5352 {
5353         struct igb_q_vector *q_vector = container_of(napi,
5354                                                      struct igb_q_vector,
5355                                                      napi);
5356         int tx_clean_complete = 1, work_done = 0;
5357
5358 #ifdef CONFIG_IGB_DCA
5359         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5360                 igb_update_dca(q_vector);
5361 #endif
5362         if (q_vector->tx_ring)
5363                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5364
5365         if (q_vector->rx_ring)
5366                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5367
5368         if (!tx_clean_complete)
5369                 work_done = budget;
5370
5371         /* If not enough Rx work done, exit the polling mode */
5372         if (work_done < budget) {
5373                 napi_complete(napi);
5374                 igb_ring_irq_enable(q_vector);
5375         }
5376
5377         return work_done;
5378 }
5379
5380 /**
5381  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5382  * @adapter: board private structure
5383  * @shhwtstamps: timestamp structure to update
5384  * @regval: unsigned 64bit system time value.
5385  *
5386  * We need to convert the system time value stored in the RX/TXSTMP registers
5387  * into a hwtstamp which can be used by the upper level timestamping functions
5388  */
5389 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5390                                    struct skb_shared_hwtstamps *shhwtstamps,
5391                                    u64 regval)
5392 {
5393         u64 ns;
5394
5395         /*
5396          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5397          * 24 to match clock shift we setup earlier.
5398          */
5399         if (adapter->hw.mac.type == e1000_82580)
5400                 regval <<= IGB_82580_TSYNC_SHIFT;
5401
5402         ns = timecounter_cyc2time(&adapter->clock, regval);
5403         timecompare_update(&adapter->compare, ns);
5404         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5405         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5406         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5407 }
5408
5409 /**
5410  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5411  * @q_vector: pointer to q_vector containing needed info
5412  * @buffer: pointer to igb_buffer structure
5413  *
5414  * If we were asked to do hardware stamping and such a time stamp is
5415  * available, then it must have been for this skb here because we only
5416  * allow only one such packet into the queue.
5417  */
5418 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5419 {
5420         struct igb_adapter *adapter = q_vector->adapter;
5421         struct e1000_hw *hw = &adapter->hw;
5422         struct skb_shared_hwtstamps shhwtstamps;
5423         u64 regval;
5424
5425         /* if skb does not support hw timestamp or TX stamp not valid exit */
5426         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5427             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5428                 return;
5429
5430         regval = rd32(E1000_TXSTMPL);
5431         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5432
5433         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5434         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5435 }
5436
5437 /**
5438  * igb_clean_tx_irq - Reclaim resources after transmit completes
5439  * @q_vector: pointer to q_vector containing needed info
5440  * returns true if ring is completely cleaned
5441  **/
5442 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5443 {
5444         struct igb_adapter *adapter = q_vector->adapter;
5445         struct igb_ring *tx_ring = q_vector->tx_ring;
5446         struct net_device *netdev = tx_ring->netdev;
5447         struct e1000_hw *hw = &adapter->hw;
5448         struct igb_buffer *buffer_info;
5449         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5450         unsigned int total_bytes = 0, total_packets = 0;
5451         unsigned int i, eop, count = 0;
5452         bool cleaned = false;
5453
5454         i = tx_ring->next_to_clean;
5455         eop = tx_ring->buffer_info[i].next_to_watch;
5456         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5457
5458         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5459                (count < tx_ring->count)) {
5460                 rmb();  /* read buffer_info after eop_desc status */
5461                 for (cleaned = false; !cleaned; count++) {
5462                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5463                         buffer_info = &tx_ring->buffer_info[i];
5464                         cleaned = (i == eop);
5465
5466                         if (buffer_info->skb) {
5467                                 total_bytes += buffer_info->bytecount;
5468                                 /* gso_segs is currently only valid for tcp */
5469                                 total_packets += buffer_info->gso_segs;
5470                                 igb_tx_hwtstamp(q_vector, buffer_info);
5471                         }
5472
5473                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5474                         tx_desc->wb.status = 0;
5475
5476                         i++;
5477                         if (i == tx_ring->count)
5478                                 i = 0;
5479                 }
5480                 eop = tx_ring->buffer_info[i].next_to_watch;
5481                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5482         }
5483
5484         tx_ring->next_to_clean = i;
5485
5486         if (unlikely(count &&
5487                      netif_carrier_ok(netdev) &&
5488                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5489                 /* Make sure that anybody stopping the queue after this
5490                  * sees the new next_to_clean.
5491                  */
5492                 smp_mb();
5493                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5494                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5495                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5496
5497                         u64_stats_update_begin(&tx_ring->tx_syncp);
5498                         tx_ring->tx_stats.restart_queue++;
5499                         u64_stats_update_end(&tx_ring->tx_syncp);
5500                 }
5501         }
5502
5503         if (tx_ring->detect_tx_hung) {
5504                 /* Detect a transmit hang in hardware, this serializes the
5505                  * check with the clearing of time_stamp and movement of i */
5506                 tx_ring->detect_tx_hung = false;
5507                 if (tx_ring->buffer_info[i].time_stamp &&
5508                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5509                                (adapter->tx_timeout_factor * HZ)) &&
5510                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5511
5512                         /* detected Tx unit hang */
5513                         dev_err(tx_ring->dev,
5514                                 "Detected Tx Unit Hang\n"
5515                                 "  Tx Queue             <%d>\n"
5516                                 "  TDH                  <%x>\n"
5517                                 "  TDT                  <%x>\n"
5518                                 "  next_to_use          <%x>\n"
5519                                 "  next_to_clean        <%x>\n"
5520                                 "buffer_info[next_to_clean]\n"
5521                                 "  time_stamp           <%lx>\n"
5522                                 "  next_to_watch        <%x>\n"
5523                                 "  jiffies              <%lx>\n"
5524                                 "  desc.status          <%x>\n",
5525                                 tx_ring->queue_index,
5526                                 readl(tx_ring->head),
5527                                 readl(tx_ring->tail),
5528                                 tx_ring->next_to_use,
5529                                 tx_ring->next_to_clean,
5530                                 tx_ring->buffer_info[eop].time_stamp,
5531                                 eop,
5532                                 jiffies,
5533                                 eop_desc->wb.status);
5534                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5535                 }
5536         }
5537         tx_ring->total_bytes += total_bytes;
5538         tx_ring->total_packets += total_packets;
5539         u64_stats_update_begin(&tx_ring->tx_syncp);
5540         tx_ring->tx_stats.bytes += total_bytes;
5541         tx_ring->tx_stats.packets += total_packets;
5542         u64_stats_update_end(&tx_ring->tx_syncp);
5543         return count < tx_ring->count;
5544 }
5545
5546 /**
5547  * igb_receive_skb - helper function to handle rx indications
5548  * @q_vector: structure containing interrupt and ring information
5549  * @skb: packet to send up
5550  * @vlan_tag: vlan tag for packet
5551  **/
5552 static void igb_receive_skb(struct igb_q_vector *q_vector,
5553                             struct sk_buff *skb,
5554                             u16 vlan_tag)
5555 {
5556         struct igb_adapter *adapter = q_vector->adapter;
5557
5558         if (vlan_tag && adapter->vlgrp)
5559                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5560                                  vlan_tag, skb);
5561         else
5562                 napi_gro_receive(&q_vector->napi, skb);
5563 }
5564
5565 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5566                                        u32 status_err, struct sk_buff *skb)
5567 {
5568         skb_checksum_none_assert(skb);
5569
5570         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5571         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5572              (status_err & E1000_RXD_STAT_IXSM))
5573                 return;
5574
5575         /* TCP/UDP checksum error bit is set */
5576         if (status_err &
5577             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5578                 /*
5579                  * work around errata with sctp packets where the TCPE aka
5580                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5581                  * packets, (aka let the stack check the crc32c)
5582                  */
5583                 if ((skb->len == 60) &&
5584                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5585                         u64_stats_update_begin(&ring->rx_syncp);
5586                         ring->rx_stats.csum_err++;
5587                         u64_stats_update_end(&ring->rx_syncp);
5588                 }
5589                 /* let the stack verify checksum errors */
5590                 return;
5591         }
5592         /* It must be a TCP or UDP packet with a valid checksum */
5593         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5594                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5595
5596         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5597 }
5598
5599 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5600                                    struct sk_buff *skb)
5601 {
5602         struct igb_adapter *adapter = q_vector->adapter;
5603         struct e1000_hw *hw = &adapter->hw;
5604         u64 regval;
5605
5606         /*
5607          * If this bit is set, then the RX registers contain the time stamp. No
5608          * other packet will be time stamped until we read these registers, so
5609          * read the registers to make them available again. Because only one
5610          * packet can be time stamped at a time, we know that the register
5611          * values must belong to this one here and therefore we don't need to
5612          * compare any of the additional attributes stored for it.
5613          *
5614          * If nothing went wrong, then it should have a shared tx_flags that we
5615          * can turn into a skb_shared_hwtstamps.
5616          */
5617         if (staterr & E1000_RXDADV_STAT_TSIP) {
5618                 u32 *stamp = (u32 *)skb->data;
5619                 regval = le32_to_cpu(*(stamp + 2));
5620                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5621                 skb_pull(skb, IGB_TS_HDR_LEN);
5622         } else {
5623                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5624                         return;
5625
5626                 regval = rd32(E1000_RXSTMPL);
5627                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5628         }
5629
5630         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5631 }
5632 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5633                                union e1000_adv_rx_desc *rx_desc)
5634 {
5635         /* HW will not DMA in data larger than the given buffer, even if it
5636          * parses the (NFS, of course) header to be larger.  In that case, it
5637          * fills the header buffer and spills the rest into the page.
5638          */
5639         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5640                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5641         if (hlen > rx_ring->rx_buffer_len)
5642                 hlen = rx_ring->rx_buffer_len;
5643         return hlen;
5644 }
5645
5646 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5647                                  int *work_done, int budget)
5648 {
5649         struct igb_ring *rx_ring = q_vector->rx_ring;
5650         struct net_device *netdev = rx_ring->netdev;
5651         struct device *dev = rx_ring->dev;
5652         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5653         struct igb_buffer *buffer_info , *next_buffer;
5654         struct sk_buff *skb;
5655         bool cleaned = false;
5656         int cleaned_count = 0;
5657         int current_node = numa_node_id();
5658         unsigned int total_bytes = 0, total_packets = 0;
5659         unsigned int i;
5660         u32 staterr;
5661         u16 length;
5662         u16 vlan_tag;
5663
5664         i = rx_ring->next_to_clean;
5665         buffer_info = &rx_ring->buffer_info[i];
5666         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5667         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5668
5669         while (staterr & E1000_RXD_STAT_DD) {
5670                 if (*work_done >= budget)
5671                         break;
5672                 (*work_done)++;
5673                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5674
5675                 skb = buffer_info->skb;
5676                 prefetch(skb->data - NET_IP_ALIGN);
5677                 buffer_info->skb = NULL;
5678
5679                 i++;
5680                 if (i == rx_ring->count)
5681                         i = 0;
5682
5683                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5684                 prefetch(next_rxd);
5685                 next_buffer = &rx_ring->buffer_info[i];
5686
5687                 length = le16_to_cpu(rx_desc->wb.upper.length);
5688                 cleaned = true;
5689                 cleaned_count++;
5690
5691                 if (buffer_info->dma) {
5692                         dma_unmap_single(dev, buffer_info->dma,
5693                                          rx_ring->rx_buffer_len,
5694                                          DMA_FROM_DEVICE);
5695                         buffer_info->dma = 0;
5696                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5697                                 skb_put(skb, length);
5698                                 goto send_up;
5699                         }
5700                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5701                 }
5702
5703                 if (length) {
5704                         dma_unmap_page(dev, buffer_info->page_dma,
5705                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5706                         buffer_info->page_dma = 0;
5707
5708                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5709                                                 buffer_info->page,
5710                                                 buffer_info->page_offset,
5711                                                 length);
5712
5713                         if ((page_count(buffer_info->page) != 1) ||
5714                             (page_to_nid(buffer_info->page) != current_node))
5715                                 buffer_info->page = NULL;
5716                         else
5717                                 get_page(buffer_info->page);
5718
5719                         skb->len += length;
5720                         skb->data_len += length;
5721                         skb->truesize += length;
5722                 }
5723
5724                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5725                         buffer_info->skb = next_buffer->skb;
5726                         buffer_info->dma = next_buffer->dma;
5727                         next_buffer->skb = skb;
5728                         next_buffer->dma = 0;
5729                         goto next_desc;
5730                 }
5731 send_up:
5732                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5733                         dev_kfree_skb_irq(skb);
5734                         goto next_desc;
5735                 }
5736
5737                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5738                         igb_rx_hwtstamp(q_vector, staterr, skb);
5739                 total_bytes += skb->len;
5740                 total_packets++;
5741
5742                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5743
5744                 skb->protocol = eth_type_trans(skb, netdev);
5745                 skb_record_rx_queue(skb, rx_ring->queue_index);
5746
5747                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5748                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5749
5750                 igb_receive_skb(q_vector, skb, vlan_tag);
5751
5752 next_desc:
5753                 rx_desc->wb.upper.status_error = 0;
5754
5755                 /* return some buffers to hardware, one at a time is too slow */
5756                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5757                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5758                         cleaned_count = 0;
5759                 }
5760
5761                 /* use prefetched values */
5762                 rx_desc = next_rxd;
5763                 buffer_info = next_buffer;
5764                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5765         }
5766
5767         rx_ring->next_to_clean = i;
5768         cleaned_count = igb_desc_unused(rx_ring);
5769
5770         if (cleaned_count)
5771                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5772
5773         rx_ring->total_packets += total_packets;
5774         rx_ring->total_bytes += total_bytes;
5775         u64_stats_update_begin(&rx_ring->rx_syncp);
5776         rx_ring->rx_stats.packets += total_packets;
5777         rx_ring->rx_stats.bytes += total_bytes;
5778         u64_stats_update_end(&rx_ring->rx_syncp);
5779         return cleaned;
5780 }
5781
5782 /**
5783  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5784  * @adapter: address of board private structure
5785  **/
5786 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5787 {
5788         struct net_device *netdev = rx_ring->netdev;
5789         union e1000_adv_rx_desc *rx_desc;
5790         struct igb_buffer *buffer_info;
5791         struct sk_buff *skb;
5792         unsigned int i;
5793         int bufsz;
5794
5795         i = rx_ring->next_to_use;
5796         buffer_info = &rx_ring->buffer_info[i];
5797
5798         bufsz = rx_ring->rx_buffer_len;
5799
5800         while (cleaned_count--) {
5801                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5802
5803                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5804                         if (!buffer_info->page) {
5805                                 buffer_info->page = netdev_alloc_page(netdev);
5806                                 if (unlikely(!buffer_info->page)) {
5807                                         u64_stats_update_begin(&rx_ring->rx_syncp);
5808                                         rx_ring->rx_stats.alloc_failed++;
5809                                         u64_stats_update_end(&rx_ring->rx_syncp);
5810                                         goto no_buffers;
5811                                 }
5812                                 buffer_info->page_offset = 0;
5813                         } else {
5814                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5815                         }
5816                         buffer_info->page_dma =
5817                                 dma_map_page(rx_ring->dev, buffer_info->page,
5818                                              buffer_info->page_offset,
5819                                              PAGE_SIZE / 2,
5820                                              DMA_FROM_DEVICE);
5821                         if (dma_mapping_error(rx_ring->dev,
5822                                               buffer_info->page_dma)) {
5823                                 buffer_info->page_dma = 0;
5824                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5825                                 rx_ring->rx_stats.alloc_failed++;
5826                                 u64_stats_update_end(&rx_ring->rx_syncp);
5827                                 goto no_buffers;
5828                         }
5829                 }
5830
5831                 skb = buffer_info->skb;
5832                 if (!skb) {
5833                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5834                         if (unlikely(!skb)) {
5835                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5836                                 rx_ring->rx_stats.alloc_failed++;
5837                                 u64_stats_update_end(&rx_ring->rx_syncp);
5838                                 goto no_buffers;
5839                         }
5840
5841                         buffer_info->skb = skb;
5842                 }
5843                 if (!buffer_info->dma) {
5844                         buffer_info->dma = dma_map_single(rx_ring->dev,
5845                                                           skb->data,
5846                                                           bufsz,
5847                                                           DMA_FROM_DEVICE);
5848                         if (dma_mapping_error(rx_ring->dev,
5849                                               buffer_info->dma)) {
5850                                 buffer_info->dma = 0;
5851                                 u64_stats_update_begin(&rx_ring->rx_syncp);
5852                                 rx_ring->rx_stats.alloc_failed++;
5853                                 u64_stats_update_end(&rx_ring->rx_syncp);
5854                                 goto no_buffers;
5855                         }
5856                 }
5857                 /* Refresh the desc even if buffer_addrs didn't change because
5858                  * each write-back erases this info. */
5859                 if (bufsz < IGB_RXBUFFER_1024) {
5860                         rx_desc->read.pkt_addr =
5861                              cpu_to_le64(buffer_info->page_dma);
5862                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5863                 } else {
5864                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5865                         rx_desc->read.hdr_addr = 0;
5866                 }
5867
5868                 i++;
5869                 if (i == rx_ring->count)
5870                         i = 0;
5871                 buffer_info = &rx_ring->buffer_info[i];
5872         }
5873
5874 no_buffers:
5875         if (rx_ring->next_to_use != i) {
5876                 rx_ring->next_to_use = i;
5877                 if (i == 0)
5878                         i = (rx_ring->count - 1);
5879                 else
5880                         i--;
5881
5882                 /* Force memory writes to complete before letting h/w
5883                  * know there are new descriptors to fetch.  (Only
5884                  * applicable for weak-ordered memory model archs,
5885                  * such as IA-64). */
5886                 wmb();
5887                 writel(i, rx_ring->tail);
5888         }
5889 }
5890
5891 /**
5892  * igb_mii_ioctl -
5893  * @netdev:
5894  * @ifreq:
5895  * @cmd:
5896  **/
5897 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5898 {
5899         struct igb_adapter *adapter = netdev_priv(netdev);
5900         struct mii_ioctl_data *data = if_mii(ifr);
5901
5902         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5903                 return -EOPNOTSUPP;
5904
5905         switch (cmd) {
5906         case SIOCGMIIPHY:
5907                 data->phy_id = adapter->hw.phy.addr;
5908                 break;
5909         case SIOCGMIIREG:
5910                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5911                                      &data->val_out))
5912                         return -EIO;
5913                 break;
5914         case SIOCSMIIREG:
5915         default:
5916                 return -EOPNOTSUPP;
5917         }
5918         return 0;
5919 }
5920
5921 /**
5922  * igb_hwtstamp_ioctl - control hardware time stamping
5923  * @netdev:
5924  * @ifreq:
5925  * @cmd:
5926  *
5927  * Outgoing time stamping can be enabled and disabled. Play nice and
5928  * disable it when requested, although it shouldn't case any overhead
5929  * when no packet needs it. At most one packet in the queue may be
5930  * marked for time stamping, otherwise it would be impossible to tell
5931  * for sure to which packet the hardware time stamp belongs.
5932  *
5933  * Incoming time stamping has to be configured via the hardware
5934  * filters. Not all combinations are supported, in particular event
5935  * type has to be specified. Matching the kind of event packet is
5936  * not supported, with the exception of "all V2 events regardless of
5937  * level 2 or 4".
5938  *
5939  **/
5940 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5941                               struct ifreq *ifr, int cmd)
5942 {
5943         struct igb_adapter *adapter = netdev_priv(netdev);
5944         struct e1000_hw *hw = &adapter->hw;
5945         struct hwtstamp_config config;
5946         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5947         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5948         u32 tsync_rx_cfg = 0;
5949         bool is_l4 = false;
5950         bool is_l2 = false;
5951         u32 regval;
5952
5953         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5954                 return -EFAULT;
5955
5956         /* reserved for future extensions */
5957         if (config.flags)
5958                 return -EINVAL;
5959
5960         switch (config.tx_type) {
5961         case HWTSTAMP_TX_OFF:
5962                 tsync_tx_ctl = 0;
5963         case HWTSTAMP_TX_ON:
5964                 break;
5965         default:
5966                 return -ERANGE;
5967         }
5968
5969         switch (config.rx_filter) {
5970         case HWTSTAMP_FILTER_NONE:
5971                 tsync_rx_ctl = 0;
5972                 break;
5973         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5974         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5975         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5976         case HWTSTAMP_FILTER_ALL:
5977                 /*
5978                  * register TSYNCRXCFG must be set, therefore it is not
5979                  * possible to time stamp both Sync and Delay_Req messages
5980                  * => fall back to time stamping all packets
5981                  */
5982                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5983                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5984                 break;
5985         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5986                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5987                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5988                 is_l4 = true;
5989                 break;
5990         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5991                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5992                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5993                 is_l4 = true;
5994                 break;
5995         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5996         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5997                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5998                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5999                 is_l2 = true;
6000                 is_l4 = true;
6001                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6002                 break;
6003         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6004         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6005                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6006                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6007                 is_l2 = true;
6008                 is_l4 = true;
6009                 config.rx_filter = HWTSTAMP_FILTER_SOME;
6010                 break;
6011         case HWTSTAMP_FILTER_PTP_V2_EVENT:
6012         case HWTSTAMP_FILTER_PTP_V2_SYNC:
6013         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6014                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6015                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6016                 is_l2 = true;
6017                 break;
6018         default:
6019                 return -ERANGE;
6020         }
6021
6022         if (hw->mac.type == e1000_82575) {
6023                 if (tsync_rx_ctl | tsync_tx_ctl)
6024                         return -EINVAL;
6025                 return 0;
6026         }
6027
6028         /*
6029          * Per-packet timestamping only works if all packets are
6030          * timestamped, so enable timestamping in all packets as
6031          * long as one rx filter was configured.
6032          */
6033         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6034                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6035                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6036         }
6037
6038         /* enable/disable TX */
6039         regval = rd32(E1000_TSYNCTXCTL);
6040         regval &= ~E1000_TSYNCTXCTL_ENABLED;
6041         regval |= tsync_tx_ctl;
6042         wr32(E1000_TSYNCTXCTL, regval);
6043
6044         /* enable/disable RX */
6045         regval = rd32(E1000_TSYNCRXCTL);
6046         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6047         regval |= tsync_rx_ctl;
6048         wr32(E1000_TSYNCRXCTL, regval);
6049
6050         /* define which PTP packets are time stamped */
6051         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6052
6053         /* define ethertype filter for timestamped packets */
6054         if (is_l2)
6055                 wr32(E1000_ETQF(3),
6056                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6057                                  E1000_ETQF_1588 | /* enable timestamping */
6058                                  ETH_P_1588));     /* 1588 eth protocol type */
6059         else
6060                 wr32(E1000_ETQF(3), 0);
6061
6062 #define PTP_PORT 319
6063         /* L4 Queue Filter[3]: filter by destination port and protocol */
6064         if (is_l4) {
6065                 u32 ftqf = (IPPROTO_UDP /* UDP */
6066                         | E1000_FTQF_VF_BP /* VF not compared */
6067                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6068                         | E1000_FTQF_MASK); /* mask all inputs */
6069                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6070
6071                 wr32(E1000_IMIR(3), htons(PTP_PORT));
6072                 wr32(E1000_IMIREXT(3),
6073                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6074                 if (hw->mac.type == e1000_82576) {
6075                         /* enable source port check */
6076                         wr32(E1000_SPQF(3), htons(PTP_PORT));
6077                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6078                 }
6079                 wr32(E1000_FTQF(3), ftqf);
6080         } else {
6081                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6082         }
6083         wrfl();
6084
6085         adapter->hwtstamp_config = config;
6086
6087         /* clear TX/RX time stamp registers, just to be sure */
6088         regval = rd32(E1000_TXSTMPH);
6089         regval = rd32(E1000_RXSTMPH);
6090
6091         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6092                 -EFAULT : 0;
6093 }
6094
6095 /**
6096  * igb_ioctl -
6097  * @netdev:
6098  * @ifreq:
6099  * @cmd:
6100  **/
6101 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6102 {
6103         switch (cmd) {
6104         case SIOCGMIIPHY:
6105         case SIOCGMIIREG:
6106         case SIOCSMIIREG:
6107                 return igb_mii_ioctl(netdev, ifr, cmd);
6108         case SIOCSHWTSTAMP:
6109                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6110         default:
6111                 return -EOPNOTSUPP;
6112         }
6113 }
6114
6115 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6116 {
6117         struct igb_adapter *adapter = hw->back;
6118         u16 cap_offset;
6119
6120         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6121         if (!cap_offset)
6122                 return -E1000_ERR_CONFIG;
6123
6124         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6125
6126         return 0;
6127 }
6128
6129 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6130 {
6131         struct igb_adapter *adapter = hw->back;
6132         u16 cap_offset;
6133
6134         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6135         if (!cap_offset)
6136                 return -E1000_ERR_CONFIG;
6137
6138         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6139
6140         return 0;
6141 }
6142
6143 static void igb_vlan_rx_register(struct net_device *netdev,
6144                                  struct vlan_group *grp)
6145 {
6146         struct igb_adapter *adapter = netdev_priv(netdev);
6147         struct e1000_hw *hw = &adapter->hw;
6148         u32 ctrl, rctl;
6149
6150         igb_irq_disable(adapter);
6151         adapter->vlgrp = grp;
6152
6153         if (grp) {
6154                 /* enable VLAN tag insert/strip */
6155                 ctrl = rd32(E1000_CTRL);
6156                 ctrl |= E1000_CTRL_VME;
6157                 wr32(E1000_CTRL, ctrl);
6158
6159                 /* Disable CFI check */
6160                 rctl = rd32(E1000_RCTL);
6161                 rctl &= ~E1000_RCTL_CFIEN;
6162                 wr32(E1000_RCTL, rctl);
6163         } else {
6164                 /* disable VLAN tag insert/strip */
6165                 ctrl = rd32(E1000_CTRL);
6166                 ctrl &= ~E1000_CTRL_VME;
6167                 wr32(E1000_CTRL, ctrl);
6168         }
6169
6170         igb_rlpml_set(adapter);
6171
6172         if (!test_bit(__IGB_DOWN, &adapter->state))
6173                 igb_irq_enable(adapter);
6174 }
6175
6176 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6177 {
6178         struct igb_adapter *adapter = netdev_priv(netdev);
6179         struct e1000_hw *hw = &adapter->hw;
6180         int pf_id = adapter->vfs_allocated_count;
6181
6182         /* attempt to add filter to vlvf array */
6183         igb_vlvf_set(adapter, vid, true, pf_id);
6184
6185         /* add the filter since PF can receive vlans w/o entry in vlvf */
6186         igb_vfta_set(hw, vid, true);
6187 }
6188
6189 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6190 {
6191         struct igb_adapter *adapter = netdev_priv(netdev);
6192         struct e1000_hw *hw = &adapter->hw;
6193         int pf_id = adapter->vfs_allocated_count;
6194         s32 err;
6195
6196         igb_irq_disable(adapter);
6197         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6198
6199         if (!test_bit(__IGB_DOWN, &adapter->state))
6200                 igb_irq_enable(adapter);
6201
6202         /* remove vlan from VLVF table array */
6203         err = igb_vlvf_set(adapter, vid, false, pf_id);
6204
6205         /* if vid was not present in VLVF just remove it from table */
6206         if (err)
6207                 igb_vfta_set(hw, vid, false);
6208 }
6209
6210 static void igb_restore_vlan(struct igb_adapter *adapter)
6211 {
6212         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6213
6214         if (adapter->vlgrp) {
6215                 u16 vid;
6216                 for (vid = 0; vid < VLAN_N_VID; vid++) {
6217                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6218                                 continue;
6219                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6220                 }
6221         }
6222 }
6223
6224 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6225 {
6226         struct pci_dev *pdev = adapter->pdev;
6227         struct e1000_mac_info *mac = &adapter->hw.mac;
6228
6229         mac->autoneg = 0;
6230
6231         /* Fiber NIC's only allow 1000 Gbps Full duplex */
6232         if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6233                 spddplx != (SPEED_1000 + DUPLEX_FULL)) {
6234                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6235                 return -EINVAL;
6236         }
6237
6238         switch (spddplx) {
6239         case SPEED_10 + DUPLEX_HALF:
6240                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6241                 break;
6242         case SPEED_10 + DUPLEX_FULL:
6243                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6244                 break;
6245         case SPEED_100 + DUPLEX_HALF:
6246                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6247                 break;
6248         case SPEED_100 + DUPLEX_FULL:
6249                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6250                 break;
6251         case SPEED_1000 + DUPLEX_FULL:
6252                 mac->autoneg = 1;
6253                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6254                 break;
6255         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6256         default:
6257                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6258                 return -EINVAL;
6259         }
6260         return 0;
6261 }
6262
6263 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6264 {
6265         struct net_device *netdev = pci_get_drvdata(pdev);
6266         struct igb_adapter *adapter = netdev_priv(netdev);
6267         struct e1000_hw *hw = &adapter->hw;
6268         u32 ctrl, rctl, status;
6269         u32 wufc = adapter->wol;
6270 #ifdef CONFIG_PM
6271         int retval = 0;
6272 #endif
6273
6274         netif_device_detach(netdev);
6275
6276         if (netif_running(netdev))
6277                 igb_close(netdev);
6278
6279         igb_clear_interrupt_scheme(adapter);
6280
6281 #ifdef CONFIG_PM
6282         retval = pci_save_state(pdev);
6283         if (retval)
6284                 return retval;
6285 #endif
6286
6287         status = rd32(E1000_STATUS);
6288         if (status & E1000_STATUS_LU)
6289                 wufc &= ~E1000_WUFC_LNKC;
6290
6291         if (wufc) {
6292                 igb_setup_rctl(adapter);
6293                 igb_set_rx_mode(netdev);
6294
6295                 /* turn on all-multi mode if wake on multicast is enabled */
6296                 if (wufc & E1000_WUFC_MC) {
6297                         rctl = rd32(E1000_RCTL);
6298                         rctl |= E1000_RCTL_MPE;
6299                         wr32(E1000_RCTL, rctl);
6300                 }
6301
6302                 ctrl = rd32(E1000_CTRL);
6303                 /* advertise wake from D3Cold */
6304                 #define E1000_CTRL_ADVD3WUC 0x00100000
6305                 /* phy power management enable */
6306                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6307                 ctrl |= E1000_CTRL_ADVD3WUC;
6308                 wr32(E1000_CTRL, ctrl);
6309
6310                 /* Allow time for pending master requests to run */
6311                 igb_disable_pcie_master(hw);
6312
6313                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6314                 wr32(E1000_WUFC, wufc);
6315         } else {
6316                 wr32(E1000_WUC, 0);
6317                 wr32(E1000_WUFC, 0);
6318         }
6319
6320         *enable_wake = wufc || adapter->en_mng_pt;
6321         if (!*enable_wake)
6322                 igb_power_down_link(adapter);
6323         else
6324                 igb_power_up_link(adapter);
6325
6326         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6327          * would have already happened in close and is redundant. */
6328         igb_release_hw_control(adapter);
6329
6330         pci_disable_device(pdev);
6331
6332         return 0;
6333 }
6334
6335 #ifdef CONFIG_PM
6336 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6337 {
6338         int retval;
6339         bool wake;
6340
6341         retval = __igb_shutdown(pdev, &wake);
6342         if (retval)
6343                 return retval;
6344
6345         if (wake) {
6346                 pci_prepare_to_sleep(pdev);
6347         } else {
6348                 pci_wake_from_d3(pdev, false);
6349                 pci_set_power_state(pdev, PCI_D3hot);
6350         }
6351
6352         return 0;
6353 }
6354
6355 static int igb_resume(struct pci_dev *pdev)
6356 {
6357         struct net_device *netdev = pci_get_drvdata(pdev);
6358         struct igb_adapter *adapter = netdev_priv(netdev);
6359         struct e1000_hw *hw = &adapter->hw;
6360         u32 err;
6361
6362         pci_set_power_state(pdev, PCI_D0);
6363         pci_restore_state(pdev);
6364         pci_save_state(pdev);
6365
6366         err = pci_enable_device_mem(pdev);
6367         if (err) {
6368                 dev_err(&pdev->dev,
6369                         "igb: Cannot enable PCI device from suspend\n");
6370                 return err;
6371         }
6372         pci_set_master(pdev);
6373
6374         pci_enable_wake(pdev, PCI_D3hot, 0);
6375         pci_enable_wake(pdev, PCI_D3cold, 0);
6376
6377         if (igb_init_interrupt_scheme(adapter)) {
6378                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6379                 return -ENOMEM;
6380         }
6381
6382         igb_reset(adapter);
6383
6384         /* let the f/w know that the h/w is now under the control of the
6385          * driver. */
6386         igb_get_hw_control(adapter);
6387
6388         wr32(E1000_WUS, ~0);
6389
6390         if (netif_running(netdev)) {
6391                 err = igb_open(netdev);
6392                 if (err)
6393                         return err;
6394         }
6395
6396         netif_device_attach(netdev);
6397
6398         return 0;
6399 }
6400 #endif
6401
6402 static void igb_shutdown(struct pci_dev *pdev)
6403 {
6404         bool wake;
6405
6406         __igb_shutdown(pdev, &wake);
6407
6408         if (system_state == SYSTEM_POWER_OFF) {
6409                 pci_wake_from_d3(pdev, wake);
6410                 pci_set_power_state(pdev, PCI_D3hot);
6411         }
6412 }
6413
6414 #ifdef CONFIG_NET_POLL_CONTROLLER
6415 /*
6416  * Polling 'interrupt' - used by things like netconsole to send skbs
6417  * without having to re-enable interrupts. It's not called while
6418  * the interrupt routine is executing.
6419  */
6420 static void igb_netpoll(struct net_device *netdev)
6421 {
6422         struct igb_adapter *adapter = netdev_priv(netdev);
6423         struct e1000_hw *hw = &adapter->hw;
6424         int i;
6425
6426         if (!adapter->msix_entries) {
6427                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6428                 igb_irq_disable(adapter);
6429                 napi_schedule(&q_vector->napi);
6430                 return;
6431         }
6432
6433         for (i = 0; i < adapter->num_q_vectors; i++) {
6434                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6435                 wr32(E1000_EIMC, q_vector->eims_value);
6436                 napi_schedule(&q_vector->napi);
6437         }
6438 }
6439 #endif /* CONFIG_NET_POLL_CONTROLLER */
6440
6441 /**
6442  * igb_io_error_detected - called when PCI error is detected
6443  * @pdev: Pointer to PCI device
6444  * @state: The current pci connection state
6445  *
6446  * This function is called after a PCI bus error affecting
6447  * this device has been detected.
6448  */
6449 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6450                                               pci_channel_state_t state)
6451 {
6452         struct net_device *netdev = pci_get_drvdata(pdev);
6453         struct igb_adapter *adapter = netdev_priv(netdev);
6454
6455         netif_device_detach(netdev);
6456
6457         if (state == pci_channel_io_perm_failure)
6458                 return PCI_ERS_RESULT_DISCONNECT;
6459
6460         if (netif_running(netdev))
6461                 igb_down(adapter);
6462         pci_disable_device(pdev);
6463
6464         /* Request a slot slot reset. */
6465         return PCI_ERS_RESULT_NEED_RESET;
6466 }
6467
6468 /**
6469  * igb_io_slot_reset - called after the pci bus has been reset.
6470  * @pdev: Pointer to PCI device
6471  *
6472  * Restart the card from scratch, as if from a cold-boot. Implementation
6473  * resembles the first-half of the igb_resume routine.
6474  */
6475 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6476 {
6477         struct net_device *netdev = pci_get_drvdata(pdev);
6478         struct igb_adapter *adapter = netdev_priv(netdev);
6479         struct e1000_hw *hw = &adapter->hw;
6480         pci_ers_result_t result;
6481         int err;
6482
6483         if (pci_enable_device_mem(pdev)) {
6484                 dev_err(&pdev->dev,
6485                         "Cannot re-enable PCI device after reset.\n");
6486                 result = PCI_ERS_RESULT_DISCONNECT;
6487         } else {
6488                 pci_set_master(pdev);
6489                 pci_restore_state(pdev);
6490                 pci_save_state(pdev);
6491
6492                 pci_enable_wake(pdev, PCI_D3hot, 0);
6493                 pci_enable_wake(pdev, PCI_D3cold, 0);
6494
6495                 igb_reset(adapter);
6496                 wr32(E1000_WUS, ~0);
6497                 result = PCI_ERS_RESULT_RECOVERED;
6498         }
6499
6500         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6501         if (err) {
6502                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6503                         "failed 0x%0x\n", err);
6504                 /* non-fatal, continue */
6505         }
6506
6507         return result;
6508 }
6509
6510 /**
6511  * igb_io_resume - called when traffic can start flowing again.
6512  * @pdev: Pointer to PCI device
6513  *
6514  * This callback is called when the error recovery driver tells us that
6515  * its OK to resume normal operation. Implementation resembles the
6516  * second-half of the igb_resume routine.
6517  */
6518 static void igb_io_resume(struct pci_dev *pdev)
6519 {
6520         struct net_device *netdev = pci_get_drvdata(pdev);
6521         struct igb_adapter *adapter = netdev_priv(netdev);
6522
6523         if (netif_running(netdev)) {
6524                 if (igb_up(adapter)) {
6525                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6526                         return;
6527                 }
6528         }
6529
6530         netif_device_attach(netdev);
6531
6532         /* let the f/w know that the h/w is now under the control of the
6533          * driver. */
6534         igb_get_hw_control(adapter);
6535 }
6536
6537 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6538                              u8 qsel)
6539 {
6540         u32 rar_low, rar_high;
6541         struct e1000_hw *hw = &adapter->hw;
6542
6543         /* HW expects these in little endian so we reverse the byte order
6544          * from network order (big endian) to little endian
6545          */
6546         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6547                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6548         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6549
6550         /* Indicate to hardware the Address is Valid. */
6551         rar_high |= E1000_RAH_AV;
6552
6553         if (hw->mac.type == e1000_82575)
6554                 rar_high |= E1000_RAH_POOL_1 * qsel;
6555         else
6556                 rar_high |= E1000_RAH_POOL_1 << qsel;
6557
6558         wr32(E1000_RAL(index), rar_low);
6559         wrfl();
6560         wr32(E1000_RAH(index), rar_high);
6561         wrfl();
6562 }
6563
6564 static int igb_set_vf_mac(struct igb_adapter *adapter,
6565                           int vf, unsigned char *mac_addr)
6566 {
6567         struct e1000_hw *hw = &adapter->hw;
6568         /* VF MAC addresses start at end of receive addresses and moves
6569          * torwards the first, as a result a collision should not be possible */
6570         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6571
6572         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6573
6574         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6575
6576         return 0;
6577 }
6578
6579 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6580 {
6581         struct igb_adapter *adapter = netdev_priv(netdev);
6582         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6583                 return -EINVAL;
6584         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6585         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6586         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6587                                       " change effective.");
6588         if (test_bit(__IGB_DOWN, &adapter->state)) {
6589                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6590                          " but the PF device is not up.\n");
6591                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6592                          " attempting to use the VF device.\n");
6593         }
6594         return igb_set_vf_mac(adapter, vf, mac);
6595 }
6596
6597 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6598 {
6599         return -EOPNOTSUPP;
6600 }
6601
6602 static int igb_ndo_get_vf_config(struct net_device *netdev,
6603                                  int vf, struct ifla_vf_info *ivi)
6604 {
6605         struct igb_adapter *adapter = netdev_priv(netdev);
6606         if (vf >= adapter->vfs_allocated_count)
6607                 return -EINVAL;
6608         ivi->vf = vf;
6609         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6610         ivi->tx_rate = 0;
6611         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6612         ivi->qos = adapter->vf_data[vf].pf_qos;
6613         return 0;
6614 }
6615
6616 static void igb_vmm_control(struct igb_adapter *adapter)
6617 {
6618         struct e1000_hw *hw = &adapter->hw;
6619         u32 reg;
6620
6621         switch (hw->mac.type) {
6622         case e1000_82575:
6623         default:
6624                 /* replication is not supported for 82575 */
6625                 return;
6626         case e1000_82576:
6627                 /* notify HW that the MAC is adding vlan tags */
6628                 reg = rd32(E1000_DTXCTL);
6629                 reg |= E1000_DTXCTL_VLAN_ADDED;
6630                 wr32(E1000_DTXCTL, reg);
6631         case e1000_82580:
6632                 /* enable replication vlan tag stripping */
6633                 reg = rd32(E1000_RPLOLR);
6634                 reg |= E1000_RPLOLR_STRVLAN;
6635                 wr32(E1000_RPLOLR, reg);
6636         case e1000_i350:
6637                 /* none of the above registers are supported by i350 */
6638                 break;
6639         }
6640
6641         if (adapter->vfs_allocated_count) {
6642                 igb_vmdq_set_loopback_pf(hw, true);
6643                 igb_vmdq_set_replication_pf(hw, true);
6644                 igb_vmdq_set_anti_spoofing_pf(hw, true,
6645                                                 adapter->vfs_allocated_count);
6646         } else {
6647                 igb_vmdq_set_loopback_pf(hw, false);
6648                 igb_vmdq_set_replication_pf(hw, false);
6649         }
6650 }
6651
6652 /* igb_main.c */