Merge branch 'for-linus-dma-masks' of git://git.linaro.org/people/rmk/linux-arm
[cascardo/linux.git] / drivers / net / ethernet / intel / ixgbe / ixgbe_main.c
index b1dc844..0066f0a 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/ethtool.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
 #include <linux/if_bridge.h>
 #include <linux/prefetch.h>
 #include <scsi/fc/fc_fcoe.h>
@@ -132,7 +133,7 @@ static struct notifier_block dca_notifier = {
 static unsigned int max_vfs;
 module_param(max_vfs, uint, 0);
 MODULE_PARM_DESC(max_vfs,
-                "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63");
+                "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63. (Deprecated)");
 #endif /* CONFIG_PCI_IOV */
 
 static unsigned int allow_unsupported_sfp;
@@ -153,7 +154,6 @@ MODULE_VERSION(DRV_VERSION);
 static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
                                          u32 reg, u16 *value)
 {
-       int pos = 0;
        struct pci_dev *parent_dev;
        struct pci_bus *parent_bus;
 
@@ -165,11 +165,10 @@ static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
        if (!parent_dev)
                return -1;
 
-       pos = pci_find_capability(parent_dev, PCI_CAP_ID_EXP);
-       if (!pos)
+       if (!pci_is_pcie(parent_dev))
                return -1;
 
-       pci_read_config_word(parent_dev, pos + reg, value);
+       pcie_capability_read_word(parent_dev, reg, value);
        return 0;
 }
 
@@ -247,7 +246,7 @@ static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter,
                max_gts = 4 * width;
                break;
        case PCIE_SPEED_8_0GT:
-               /* 128b/130b encoding only reduces throughput by 1% */
+               /* 128b/130b encoding reduces throughput by less than 2% */
                max_gts = 8 * width;
                break;
        default:
@@ -265,7 +264,7 @@ static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter,
                   width,
                   (speed == PCIE_SPEED_2_5GT ? "20%" :
                    speed == PCIE_SPEED_5_0GT ? "20%" :
-                   speed == PCIE_SPEED_8_0GT ? "N/a" :
+                   speed == PCIE_SPEED_8_0GT ? "<2%" :
                    "Unknown"));
 
        if (max_gts < expected_gts) {
@@ -872,11 +871,18 @@ static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
 
 static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring)
 {
-       struct ixgbe_adapter *adapter = netdev_priv(ring->netdev);
-       struct ixgbe_hw *hw = &adapter->hw;
+       struct ixgbe_adapter *adapter;
+       struct ixgbe_hw *hw;
+       u32 head, tail;
 
-       u32 head = IXGBE_READ_REG(hw, IXGBE_TDH(ring->reg_idx));
-       u32 tail = IXGBE_READ_REG(hw, IXGBE_TDT(ring->reg_idx));
+       if (ring->l2_accel_priv)
+               adapter = ring->l2_accel_priv->real_adapter;
+       else
+               adapter = netdev_priv(ring->netdev);
+
+       hw = &adapter->hw;
+       head = IXGBE_READ_REG(hw, IXGBE_TDH(ring->reg_idx));
+       tail = IXGBE_READ_REG(hw, IXGBE_TDT(ring->reg_idx));
 
        if (head != tail)
                return (head < tail) ?
@@ -1585,7 +1591,7 @@ static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
 {
        struct ixgbe_adapter *adapter = q_vector->adapter;
 
-       if (ixgbe_qv_ll_polling(q_vector))
+       if (ixgbe_qv_busy_polling(q_vector))
                netif_receive_skb(skb);
        else if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL))
                napi_gro_receive(&q_vector->napi, skb);
@@ -2097,7 +2103,7 @@ static int ixgbe_low_latency_recv(struct napi_struct *napi)
 
        ixgbe_for_each_ring(ring, q_vector->rx) {
                found = ixgbe_clean_rx_irq(q_vector, ring, 4);
-#ifdef LL_EXTENDED_STATS
+#ifdef BP_EXTENDED_STATS
                if (found)
                        ring->stats.cleaned += found;
                else
@@ -3005,7 +3011,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
                struct ixgbe_q_vector *q_vector = ring->q_vector;
 
                if (q_vector)
-                       netif_set_xps_queue(adapter->netdev,
+                       netif_set_xps_queue(ring->netdev,
                                            &q_vector->affinity_mask,
                                            ring->queue_index);
        }
@@ -3395,7 +3401,7 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        int rss_i = adapter->ring_feature[RING_F_RSS].indices;
-       int p;
+       u16 pool;
 
        /* PSRTYPE must be initialized in non 82598 adapters */
        u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
@@ -3412,9 +3418,8 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
        else if (rss_i > 1)
                psrtype |= 1 << 29;
 
-       for (p = 0; p < adapter->num_rx_pools; p++)
-               IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(p)),
-                               psrtype);
+       for_each_set_bit(pool, &adapter->fwd_bitmask, 32)
+               IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype);
 }
 
 static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
@@ -3571,7 +3576,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        int i;
-       u32 rxctrl;
+       u32 rxctrl, rfctl;
 
        /* disable receives while setting up the descriptors */
        rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
@@ -3580,6 +3585,13 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
        ixgbe_setup_psrtype(adapter);
        ixgbe_setup_rdrxctl(adapter);
 
+       /* RSC Setup */
+       rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
+       rfctl &= ~IXGBE_RFCTL_RSC_DIS;
+       if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))
+               rfctl |= IXGBE_RFCTL_RSC_DIS;
+       IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
+
        /* Program registers for the distribution of queues */
        ixgbe_setup_mrqc(adapter);
 
@@ -3676,7 +3688,11 @@ static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter)
        case ixgbe_mac_82599EB:
        case ixgbe_mac_X540:
                for (i = 0; i < adapter->num_rx_queues; i++) {
-                       j = adapter->rx_ring[i]->reg_idx;
+                       struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+                       if (ring->l2_accel_priv)
+                               continue;
+                       j = ring->reg_idx;
                        vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j));
                        vlnctrl &= ~IXGBE_RXDCTL_VME;
                        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl);
@@ -3706,7 +3722,11 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter)
        case ixgbe_mac_82599EB:
        case ixgbe_mac_X540:
                for (i = 0; i < adapter->num_rx_queues; i++) {
-                       j = adapter->rx_ring[i]->reg_idx;
+                       struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+                       if (ring->l2_accel_priv)
+                               continue;
+                       j = ring->reg_idx;
                        vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j));
                        vlnctrl |= IXGBE_RXDCTL_VME;
                        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl);
@@ -3743,7 +3763,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev)
        unsigned int rar_entries = hw->mac.num_rar_entries - 1;
        int count = 0;
 
-       /* In SR-IOV mode significantly less RAR entries are available */
+       /* In SR-IOV/VMDQ modes significantly less RAR entries are available */
        if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
                rar_entries = IXGBE_MAX_PF_MACVLANS - 1;
 
@@ -3818,14 +3838,6 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
                if (netdev->flags & IFF_ALLMULTI) {
                        fctrl |= IXGBE_FCTRL_MPE;
                        vmolr |= IXGBE_VMOLR_MPE;
-               } else {
-                       /*
-                        * Write addresses to the MTA, if the attempt fails
-                        * then we should just turn on promiscuous mode so
-                        * that we can at least receive multicast traffic
-                        */
-                       hw->mac.ops.update_mc_addr_list(hw, netdev);
-                       vmolr |= IXGBE_VMOLR_ROMPE;
                }
                ixgbe_vlan_filter_enable(adapter);
                hw->addr_ctrl.user_set_promisc = false;
@@ -3842,6 +3854,13 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
                vmolr |= IXGBE_VMOLR_ROPE;
        }
 
+       /* Write addresses to the MTA, if the attempt fails
+        * then we should just turn on promiscuous mode so
+        * that we can at least receive multicast traffic
+        */
+       hw->mac.ops.update_mc_addr_list(hw, netdev);
+       vmolr |= IXGBE_VMOLR_ROMPE;
+
        if (adapter->num_vfs)
                ixgbe_restore_vf_multicasts(adapter);
 
@@ -3886,15 +3905,13 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
 {
        int q_idx;
 
-       local_bh_disable(); /* for ixgbe_qv_lock_napi() */
        for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) {
                napi_disable(&adapter->q_vector[q_idx]->napi);
-               while (!ixgbe_qv_lock_napi(adapter->q_vector[q_idx])) {
+               while (!ixgbe_qv_disable(adapter->q_vector[q_idx])) {
                        pr_info("QV %d locked\n", q_idx);
-                       mdelay(1);
+                       usleep_range(1000, 20000);
                }
        }
-       local_bh_enable();
 }
 
 #ifdef CONFIG_IXGBE_DCB
@@ -4111,6 +4128,228 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
        spin_unlock(&adapter->fdir_perfect_lock);
 }
 
+static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool,
+                                     struct ixgbe_adapter *adapter)
+{
+       struct ixgbe_hw *hw = &adapter->hw;
+       u32 vmolr;
+
+       /* No unicast promiscuous support for VMDQ devices. */
+       vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool));
+       vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE);
+
+       /* clear the affected bit */
+       vmolr &= ~IXGBE_VMOLR_MPE;
+
+       if (dev->flags & IFF_ALLMULTI) {
+               vmolr |= IXGBE_VMOLR_MPE;
+       } else {
+               vmolr |= IXGBE_VMOLR_ROMPE;
+               hw->mac.ops.update_mc_addr_list(hw, dev);
+       }
+       ixgbe_write_uc_addr_list(adapter->netdev);
+       IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr);
+}
+
+static void ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+                                u8 *addr, u16 pool)
+{
+       struct ixgbe_hw *hw = &adapter->hw;
+       unsigned int entry;
+
+       entry = hw->mac.num_rar_entries - pool;
+       hw->mac.ops.set_rar(hw, entry, addr, VMDQ_P(pool), IXGBE_RAH_AV);
+}
+
+static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter)
+{
+       struct ixgbe_adapter *adapter = vadapter->real_adapter;
+       int rss_i = adapter->num_rx_queues_per_pool;
+       struct ixgbe_hw *hw = &adapter->hw;
+       u16 pool = vadapter->pool;
+       u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
+                     IXGBE_PSRTYPE_UDPHDR |
+                     IXGBE_PSRTYPE_IPV4HDR |
+                     IXGBE_PSRTYPE_L2HDR |
+                     IXGBE_PSRTYPE_IPV6HDR;
+
+       if (hw->mac.type == ixgbe_mac_82598EB)
+               return;
+
+       if (rss_i > 3)
+               psrtype |= 2 << 29;
+       else if (rss_i > 1)
+               psrtype |= 1 << 29;
+
+       IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype);
+}
+
+/**
+ * ixgbe_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
+{
+       struct device *dev = rx_ring->dev;
+       unsigned long size;
+       u16 i;
+
+       /* ring already cleared, nothing to do */
+       if (!rx_ring->rx_buffer_info)
+               return;
+
+       /* Free all the Rx ring sk_buffs */
+       for (i = 0; i < rx_ring->count; i++) {
+               struct ixgbe_rx_buffer *rx_buffer;
+
+               rx_buffer = &rx_ring->rx_buffer_info[i];
+               if (rx_buffer->skb) {
+                       struct sk_buff *skb = rx_buffer->skb;
+                       if (IXGBE_CB(skb)->page_released) {
+                               dma_unmap_page(dev,
+                                              IXGBE_CB(skb)->dma,
+                                              ixgbe_rx_bufsz(rx_ring),
+                                              DMA_FROM_DEVICE);
+                               IXGBE_CB(skb)->page_released = false;
+                       }
+                       dev_kfree_skb(skb);
+               }
+               rx_buffer->skb = NULL;
+               if (rx_buffer->dma)
+                       dma_unmap_page(dev, rx_buffer->dma,
+                                      ixgbe_rx_pg_size(rx_ring),
+                                      DMA_FROM_DEVICE);
+               rx_buffer->dma = 0;
+               if (rx_buffer->page)
+                       __free_pages(rx_buffer->page,
+                                    ixgbe_rx_pg_order(rx_ring));
+               rx_buffer->page = NULL;
+       }
+
+       size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
+       memset(rx_ring->rx_buffer_info, 0, size);
+
+       /* Zero out the descriptor ring */
+       memset(rx_ring->desc, 0, rx_ring->size);
+
+       rx_ring->next_to_alloc = 0;
+       rx_ring->next_to_clean = 0;
+       rx_ring->next_to_use = 0;
+}
+
+static void ixgbe_disable_fwd_ring(struct ixgbe_fwd_adapter *vadapter,
+                                  struct ixgbe_ring *rx_ring)
+{
+       struct ixgbe_adapter *adapter = vadapter->real_adapter;
+       int index = rx_ring->queue_index + vadapter->rx_base_queue;
+
+       /* shutdown specific queue receive and wait for dma to settle */
+       ixgbe_disable_rx_queue(adapter, rx_ring);
+       usleep_range(10000, 20000);
+       ixgbe_irq_disable_queues(adapter, ((u64)1 << index));
+       ixgbe_clean_rx_ring(rx_ring);
+       rx_ring->l2_accel_priv = NULL;
+}
+
+int ixgbe_fwd_ring_down(struct net_device *vdev,
+                       struct ixgbe_fwd_adapter *accel)
+{
+       struct ixgbe_adapter *adapter = accel->real_adapter;
+       unsigned int rxbase = accel->rx_base_queue;
+       unsigned int txbase = accel->tx_base_queue;
+       int i;
+
+       netif_tx_stop_all_queues(vdev);
+
+       for (i = 0; i < adapter->num_rx_queues_per_pool; i++) {
+               ixgbe_disable_fwd_ring(accel, adapter->rx_ring[rxbase + i]);
+               adapter->rx_ring[rxbase + i]->netdev = adapter->netdev;
+       }
+
+       for (i = 0; i < adapter->num_rx_queues_per_pool; i++) {
+               adapter->tx_ring[txbase + i]->l2_accel_priv = NULL;
+               adapter->tx_ring[txbase + i]->netdev = adapter->netdev;
+       }
+
+
+       return 0;
+}
+
+static int ixgbe_fwd_ring_up(struct net_device *vdev,
+                            struct ixgbe_fwd_adapter *accel)
+{
+       struct ixgbe_adapter *adapter = accel->real_adapter;
+       unsigned int rxbase, txbase, queues;
+       int i, baseq, err = 0;
+
+       if (!test_bit(accel->pool, &adapter->fwd_bitmask))
+               return 0;
+
+       baseq = accel->pool * adapter->num_rx_queues_per_pool;
+       netdev_dbg(vdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
+                  accel->pool, adapter->num_rx_pools,
+                  baseq, baseq + adapter->num_rx_queues_per_pool,
+                  adapter->fwd_bitmask);
+
+       accel->netdev = vdev;
+       accel->rx_base_queue = rxbase = baseq;
+       accel->tx_base_queue = txbase = baseq;
+
+       for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
+               ixgbe_disable_fwd_ring(accel, adapter->rx_ring[rxbase + i]);
+
+       for (i = 0; i < adapter->num_rx_queues_per_pool; i++) {
+               adapter->rx_ring[rxbase + i]->netdev = vdev;
+               adapter->rx_ring[rxbase + i]->l2_accel_priv = accel;
+               ixgbe_configure_rx_ring(adapter, adapter->rx_ring[rxbase + i]);
+       }
+
+       for (i = 0; i < adapter->num_rx_queues_per_pool; i++) {
+               adapter->tx_ring[txbase + i]->netdev = vdev;
+               adapter->tx_ring[txbase + i]->l2_accel_priv = accel;
+       }
+
+       queues = min_t(unsigned int,
+                      adapter->num_rx_queues_per_pool, vdev->num_tx_queues);
+       err = netif_set_real_num_tx_queues(vdev, queues);
+       if (err)
+               goto fwd_queue_err;
+
+       err = netif_set_real_num_rx_queues(vdev, queues);
+       if (err)
+               goto fwd_queue_err;
+
+       if (is_valid_ether_addr(vdev->dev_addr))
+               ixgbe_add_mac_filter(adapter, vdev->dev_addr, accel->pool);
+
+       ixgbe_fwd_psrtype(accel);
+       ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter);
+       return err;
+fwd_queue_err:
+       ixgbe_fwd_ring_down(vdev, accel);
+       return err;
+}
+
+static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
+{
+       struct net_device *upper;
+       struct list_head *iter;
+       int err;
+
+       netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
+               if (netif_is_macvlan(upper)) {
+                       struct macvlan_dev *dfwd = netdev_priv(upper);
+                       struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+
+                       if (dfwd->fwd_priv) {
+                               err = ixgbe_fwd_ring_up(upper, vadapter);
+                               if (err)
+                                       continue;
+                       }
+               }
+       }
+}
+
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
@@ -4162,6 +4401,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 #endif /* IXGBE_FCOE */
        ixgbe_configure_tx(adapter);
        ixgbe_configure_rx(adapter);
+       ixgbe_configure_dfwd(adapter);
 }
 
 static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw)
@@ -4315,6 +4555,8 @@ static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter)
 static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
 {
        struct ixgbe_hw *hw = &adapter->hw;
+       struct net_device *upper;
+       struct list_head *iter;
        int err;
        u32 ctrl_ext;
 
@@ -4358,6 +4600,16 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
        /* enable transmits */
        netif_tx_start_all_queues(adapter->netdev);
 
+       /* enable any upper devices */
+       netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
+               if (netif_is_macvlan(upper)) {
+                       struct macvlan_dev *vlan = netdev_priv(upper);
+
+                       if (vlan->fwd_priv)
+                               netif_tx_start_all_queues(upper);
+               }
+       }
+
        /* bring the link up in the watchdog, this could race with our first
         * link up interrupt but shouldn't be a problem */
        adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
@@ -4448,59 +4700,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter)
                ixgbe_ptp_reset(adapter);
 }
 
-/**
- * ixgbe_clean_rx_ring - Free Rx Buffers per Queue
- * @rx_ring: ring to free buffers from
- **/
-static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
-{
-       struct device *dev = rx_ring->dev;
-       unsigned long size;
-       u16 i;
-
-       /* ring already cleared, nothing to do */
-       if (!rx_ring->rx_buffer_info)
-               return;
-
-       /* Free all the Rx ring sk_buffs */
-       for (i = 0; i < rx_ring->count; i++) {
-               struct ixgbe_rx_buffer *rx_buffer;
-
-               rx_buffer = &rx_ring->rx_buffer_info[i];
-               if (rx_buffer->skb) {
-                       struct sk_buff *skb = rx_buffer->skb;
-                       if (IXGBE_CB(skb)->page_released) {
-                               dma_unmap_page(dev,
-                                              IXGBE_CB(skb)->dma,
-                                              ixgbe_rx_bufsz(rx_ring),
-                                              DMA_FROM_DEVICE);
-                               IXGBE_CB(skb)->page_released = false;
-                       }
-                       dev_kfree_skb(skb);
-               }
-               rx_buffer->skb = NULL;
-               if (rx_buffer->dma)
-                       dma_unmap_page(dev, rx_buffer->dma,
-                                      ixgbe_rx_pg_size(rx_ring),
-                                      DMA_FROM_DEVICE);
-               rx_buffer->dma = 0;
-               if (rx_buffer->page)
-                       __free_pages(rx_buffer->page,
-                                    ixgbe_rx_pg_order(rx_ring));
-               rx_buffer->page = NULL;
-       }
-
-       size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
-       memset(rx_ring->rx_buffer_info, 0, size);
-
-       /* Zero out the descriptor ring */
-       memset(rx_ring->desc, 0, rx_ring->size);
-
-       rx_ring->next_to_alloc = 0;
-       rx_ring->next_to_clean = 0;
-       rx_ring->next_to_use = 0;
-}
-
 /**
  * ixgbe_clean_tx_ring - Free Tx Buffers
  * @tx_ring: ring to be cleaned
@@ -4578,6 +4777,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
        struct ixgbe_hw *hw = &adapter->hw;
+       struct net_device *upper;
+       struct list_head *iter;
        u32 rxctrl;
        int i;
 
@@ -4601,6 +4802,19 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
        netif_carrier_off(netdev);
        netif_tx_disable(netdev);
 
+       /* disable any upper devices */
+       netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
+               if (netif_is_macvlan(upper)) {
+                       struct macvlan_dev *vlan = netdev_priv(upper);
+
+                       if (vlan->fwd_priv) {
+                               netif_tx_stop_all_queues(upper);
+                               netif_carrier_off(upper);
+                               netif_tx_disable(upper);
+                       }
+               }
+       }
+
        ixgbe_irq_disable(adapter);
 
        ixgbe_napi_disable_all(adapter);
@@ -4809,11 +5023,20 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
        hw->fc.disable_fc_autoneg = ixgbe_device_supports_autoneg_fc(hw);
 
 #ifdef CONFIG_PCI_IOV
+       if (max_vfs > 0)
+               e_dev_warn("Enabling SR-IOV VFs using the max_vfs module parameter is deprecated - please use the pci sysfs interface instead.\n");
+
        /* assign number of SR-IOV VFs */
-       if (hw->mac.type != ixgbe_mac_82598EB)
-               adapter->num_vfs = (max_vfs > 63) ? 0 : max_vfs;
+       if (hw->mac.type != ixgbe_mac_82598EB) {
+               if (max_vfs > 63) {
+                       adapter->num_vfs = 0;
+                       e_dev_warn("max_vfs parameter out of range. Not assigning any SR-IOV VFs\n");
+               } else {
+                       adapter->num_vfs = max_vfs;
+               }
+       }
+#endif /* CONFIG_PCI_IOV */
 
-#endif
        /* enable itr by default in dynamic mode */
        adapter->rx_itr_setting = 1;
        adapter->tx_itr_setting = 1;
@@ -4831,6 +5054,8 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
                return -EIO;
        }
 
+       /* PF holds first pool slot */
+       set_bit(0, &adapter->fwd_bitmask);
        set_bit(__IXGBE_DOWN, &adapter->state);
 
        return 0;
@@ -5136,7 +5361,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
 static int ixgbe_open(struct net_device *netdev)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
-       int err;
+       int err, queues;
 
        /* disallow open during test */
        if (test_bit(__IXGBE_TESTING, &adapter->state))
@@ -5161,16 +5386,21 @@ static int ixgbe_open(struct net_device *netdev)
                goto err_req_irq;
 
        /* Notify the stack of the actual queue counts. */
-       err = netif_set_real_num_tx_queues(netdev,
-                                          adapter->num_rx_pools > 1 ? 1 :
-                                          adapter->num_tx_queues);
+       if (adapter->num_rx_pools > 1)
+               queues = adapter->num_rx_queues_per_pool;
+       else
+               queues = adapter->num_tx_queues;
+
+       err = netif_set_real_num_tx_queues(netdev, queues);
        if (err)
                goto err_set_queues;
 
-
-       err = netif_set_real_num_rx_queues(netdev,
-                                          adapter->num_rx_pools > 1 ? 1 :
-                                          adapter->num_rx_queues);
+       if (adapter->num_rx_pools > 1 &&
+           adapter->num_rx_queues > IXGBE_MAX_L2A_QUEUES)
+               queues = IXGBE_MAX_L2A_QUEUES;
+       else
+               queues = adapter->num_rx_queues;
+       err = netif_set_real_num_rx_queues(netdev, queues);
        if (err)
                goto err_set_queues;
 
@@ -5993,8 +6223,16 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
        adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
 
        speed = hw->phy.autoneg_advertised;
-       if ((!speed) && (hw->mac.ops.get_link_capabilities))
+       if ((!speed) && (hw->mac.ops.get_link_capabilities)) {
                hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg);
+
+               /* setup the highest link when no autoneg */
+               if (!autoneg) {
+                       if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+                               speed = IXGBE_LINK_SPEED_10GB_FULL;
+               }
+       }
+
        if (hw->mac.ops.setup_link)
                hw->mac.ops.setup_link(hw, speed, true);
 
@@ -6752,8 +6990,9 @@ out_drop:
        return NETDEV_TX_OK;
 }
 
-static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
-                                   struct net_device *netdev)
+static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
+                                     struct net_device *netdev,
+                                     struct ixgbe_ring *ring)
 {
        struct ixgbe_adapter *adapter = netdev_priv(netdev);
        struct ixgbe_ring *tx_ring;
@@ -6769,10 +7008,17 @@ static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
                skb_set_tail_pointer(skb, 17);
        }
 
-       tx_ring = adapter->tx_ring[skb->queue_mapping];
+       tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];
+
        return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
 }
 
+static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
+                                   struct net_device *netdev)
+{
+       return __ixgbe_xmit_frame(skb, netdev, NULL);
+}
+
 /**
  * ixgbe_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -7029,6 +7275,7 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
 {
        struct ixgbe_adapter *adapter = netdev_priv(dev);
        struct ixgbe_hw *hw = &adapter->hw;
+       bool pools;
 
        /* Hardware supports up to 8 traffic classes */
        if (tc > adapter->dcb_cfg.num_tcs.pg_tcs ||
@@ -7036,6 +7283,10 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
             tc < MAX_TRAFFIC_CLASS))
                return -EINVAL;
 
+       pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1);
+       if (tc && pools && adapter->num_rx_pools > IXGBE_MAX_DCBMACVLANS)
+               return -EBUSY;
+
        /* Hardware has to reinitialize queues and interrupts to
         * match packet buffer alignment. Unfortunately, the
         * hardware is not flexible enough to do this dynamically.
@@ -7290,6 +7541,104 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
        return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
 }
 
+static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
+{
+       struct ixgbe_fwd_adapter *fwd_adapter = NULL;
+       struct ixgbe_adapter *adapter = netdev_priv(pdev);
+       unsigned int limit;
+       int pool, err;
+
+#ifdef CONFIG_RPS
+       if (vdev->num_rx_queues != vdev->num_tx_queues) {
+               netdev_info(pdev, "%s: Only supports a single queue count for TX and RX\n",
+                           vdev->name);
+               return ERR_PTR(-EINVAL);
+       }
+#endif
+       /* Check for hardware restriction on number of rx/tx queues */
+       if (vdev->num_tx_queues > IXGBE_MAX_L2A_QUEUES ||
+           vdev->num_tx_queues == IXGBE_BAD_L2A_QUEUE) {
+               netdev_info(pdev,
+                           "%s: Supports RX/TX Queue counts 1,2, and 4\n",
+                           pdev->name);
+               return ERR_PTR(-EINVAL);
+       }
+
+       if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+             adapter->num_rx_pools > IXGBE_MAX_DCBMACVLANS - 1) ||
+           (adapter->num_rx_pools > IXGBE_MAX_MACVLANS))
+               return ERR_PTR(-EBUSY);
+
+       fwd_adapter = kcalloc(1, sizeof(struct ixgbe_fwd_adapter), GFP_KERNEL);
+       if (!fwd_adapter)
+               return ERR_PTR(-ENOMEM);
+
+       pool = find_first_zero_bit(&adapter->fwd_bitmask, 32);
+       adapter->num_rx_pools++;
+       set_bit(pool, &adapter->fwd_bitmask);
+       limit = find_last_bit(&adapter->fwd_bitmask, 32);
+
+       /* Enable VMDq flag so device will be set in VM mode */
+       adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED;
+       adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
+       adapter->ring_feature[RING_F_RSS].limit = vdev->num_tx_queues;
+
+       /* Force reinit of ring allocation with VMDQ enabled */
+       err = ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
+       if (err)
+               goto fwd_add_err;
+       fwd_adapter->pool = pool;
+       fwd_adapter->real_adapter = adapter;
+       err = ixgbe_fwd_ring_up(vdev, fwd_adapter);
+       if (err)
+               goto fwd_add_err;
+       netif_tx_start_all_queues(vdev);
+       return fwd_adapter;
+fwd_add_err:
+       /* unwind counter and free adapter struct */
+       netdev_info(pdev,
+                   "%s: dfwd hardware acceleration failed\n", vdev->name);
+       clear_bit(pool, &adapter->fwd_bitmask);
+       adapter->num_rx_pools--;
+       kfree(fwd_adapter);
+       return ERR_PTR(err);
+}
+
+static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
+{
+       struct ixgbe_fwd_adapter *fwd_adapter = priv;
+       struct ixgbe_adapter *adapter = fwd_adapter->real_adapter;
+       unsigned int limit;
+
+       clear_bit(fwd_adapter->pool, &adapter->fwd_bitmask);
+       adapter->num_rx_pools--;
+
+       limit = find_last_bit(&adapter->fwd_bitmask, 32);
+       adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
+       ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter);
+       ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
+       netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
+                  fwd_adapter->pool, adapter->num_rx_pools,
+                  fwd_adapter->rx_base_queue,
+                  fwd_adapter->rx_base_queue + adapter->num_rx_queues_per_pool,
+                  adapter->fwd_bitmask);
+       kfree(fwd_adapter);
+}
+
+static netdev_tx_t ixgbe_fwd_xmit(struct sk_buff *skb,
+                                 struct net_device *dev,
+                                 void *priv)
+{
+       struct ixgbe_fwd_adapter *fwd_adapter = priv;
+       unsigned int queue;
+       struct ixgbe_ring *tx_ring;
+
+       queue = skb->queue_mapping + fwd_adapter->tx_base_queue;
+       tx_ring = fwd_adapter->real_adapter->tx_ring[queue];
+
+       return __ixgbe_xmit_frame(skb, dev, tx_ring);
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_open               = ixgbe_open,
        .ndo_stop               = ixgbe_close,
@@ -7334,6 +7683,9 @@ static const struct net_device_ops ixgbe_netdev_ops = {
        .ndo_fdb_add            = ixgbe_ndo_fdb_add,
        .ndo_bridge_setlink     = ixgbe_ndo_bridge_setlink,
        .ndo_bridge_getlink     = ixgbe_ndo_bridge_getlink,
+       .ndo_dfwd_add_station   = ixgbe_fwd_add,
+       .ndo_dfwd_del_station   = ixgbe_fwd_del,
+       .ndo_dfwd_start_xmit    = ixgbe_fwd_xmit,
 };
 
 /**
@@ -7347,19 +7699,16 @@ static const struct net_device_ops ixgbe_netdev_ops = {
  **/
 static inline int ixgbe_enumerate_functions(struct ixgbe_adapter *adapter)
 {
-       struct ixgbe_hw *hw = &adapter->hw;
        struct list_head *entry;
        int physfns = 0;
 
-       /* Some cards can not use the generic count PCIe functions method, and
-        * so must be hardcoded to the correct value.
+       /* Some cards can not use the generic count PCIe functions method,
+        * because they are behind a parent switch, so we hardcode these with
+        * the correct number of functions.
         */
-       switch (hw->device_id) {
-       case IXGBE_DEV_ID_82599_SFP_SF_QP:
-       case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+       if (ixgbe_pcie_from_parent(&adapter->hw)) {
                physfns = 4;
-               break;
-       default:
+       } else {
                list_for_each(entry, &adapter->pdev->bus_list) {
                        struct pci_dev *pdev =
                                list_entry(entry, struct pci_dev, bus_list);
@@ -7633,7 +7982,8 @@ skip_sriov:
                           NETIF_F_TSO |
                           NETIF_F_TSO6 |
                           NETIF_F_RXHASH |
-                          NETIF_F_RXCSUM;
+                          NETIF_F_RXCSUM |
+                          NETIF_F_HW_L2FW_DOFFLOAD;
 
        netdev->hw_features = netdev->features;
 
@@ -7739,29 +8089,6 @@ skip_sriov:
        if (ixgbe_pcie_from_parent(hw))
                ixgbe_get_parent_bus_info(adapter);
 
-       /* print bus type/speed/width info */
-       e_dev_info("(PCI Express:%s:%s) %pM\n",
-                  (hw->bus.speed == ixgbe_bus_speed_8000 ? "8.0GT/s" :
-                   hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" :
-                   hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" :
-                   "Unknown"),
-                  (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" :
-                   hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" :
-                   hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" :
-                   "Unknown"),
-                  netdev->dev_addr);
-
-       err = ixgbe_read_pba_string_generic(hw, part_str, IXGBE_PBANUM_LENGTH);
-       if (err)
-               strncpy(part_str, "Unknown", IXGBE_PBANUM_LENGTH);
-       if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
-               e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
-                          hw->mac.type, hw->phy.type, hw->phy.sfp_type,
-                          part_str);
-       else
-               e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
-                          hw->mac.type, hw->phy.type, part_str);
-
        /* calculate the expected PCIe bandwidth required for optimal
         * performance. Note that some older parts will never have enough
         * bandwidth due to being older generation PCIe parts. We clamp these
@@ -7777,6 +8104,19 @@ skip_sriov:
        }
        ixgbe_check_minimum_link(adapter, expected_gts);
 
+       err = ixgbe_read_pba_string_generic(hw, part_str, IXGBE_PBANUM_LENGTH);
+       if (err)
+               strncpy(part_str, "Unknown", IXGBE_PBANUM_LENGTH);
+       if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
+               e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
+                          hw->mac.type, hw->phy.type, hw->phy.sfp_type,
+                          part_str);
+       else
+               e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
+                          hw->mac.type, hw->phy.type, part_str);
+
+       e_dev_info("%pM\n", netdev->dev_addr);
+
        /* reset the hardware with the new settings */
        err = hw->mac.ops.start_hw(hw);
        if (err == IXGBE_ERR_EEPROM_VERSION) {