Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorDavid S. Miller <davem@davemloft.net>
Mon, 12 Sep 2016 22:52:44 +0000 (15:52 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 12 Sep 2016 22:52:44 +0000 (15:52 -0700)
Conflicts:
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/phy/Kconfig

All conflicts were cases of overlapping commits.

Signed-off-by: David S. Miller <davem@davemloft.net>
40 files changed:
1  2 
MAINTAINERS
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c
drivers/net/bonding/bond_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_eth_soc.h
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/synopsys/dwc_eth_qos.c
drivers/net/phy/Kconfig
drivers/net/vxlan.c
include/linux/mlx5/mlx5_ifc.h
include/linux/netdevice.h
net/bridge/br_input.c
net/core/dev.c
net/core/flow_dissector.c
net/ipv4/fib_frontend.c
net/ipv4/fib_semantics.c
net/ipv4/xfrm4_policy.c
net/ipv6/ip6_tunnel.c
net/ipv6/xfrm6_policy.c
net/kcm/kcmsock.c
net/xfrm/xfrm_policy.c

diff --combined MAINTAINERS
@@@ -636,15 -636,6 +636,15 @@@ F:       drivers/tty/serial/altera_jtaguart.
  F:    include/linux/altera_uart.h
  F:    include/linux/altera_jtaguart.h
  
 +AMAZON ETHERNET DRIVERS
 +M:    Netanel Belgazal <netanel@annapurnalabs.com>
 +R:    Saeed Bishara <saeed@annapurnalabs.com>
 +R:    Zorik Machulsky <zorik@annapurnalabs.com>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    Documentation/networking/ena.txt
 +F:    drivers/net/ethernet/amazon/
 +
  AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
  M:    Tom Lendacky <thomas.lendacky@amd.com>
  M:    Gary Hook <gary.hook@amd.com>
@@@ -807,6 -798,7 +807,7 @@@ M: Laura Abbott <labbott@redhat.com
  M:    Sumit Semwal <sumit.semwal@linaro.org>
  L:    devel@driverdev.osuosl.org
  S:    Supported
+ F:    Documentation/devicetree/bindings/staging/ion/
  F:    drivers/staging/android/ion
  F:    drivers/staging/android/uapi/ion.h
  F:    drivers/staging/android/uapi/ion_test.h
@@@ -1632,7 -1624,7 +1633,7 @@@ N:      rockchi
  
  ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
  M:    Kukjin Kim <kgene@kernel.org>
- M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  S:    Maintained
@@@ -1652,7 -1644,6 +1653,6 @@@ F:      drivers/*/*s3c64xx
  F:    drivers/*/*s5pv210*
  F:    drivers/memory/samsung/*
  F:    drivers/soc/samsung/*
- F:    drivers/spi/spi-s3c*
  F:    Documentation/arm/Samsung/
  F:    Documentation/devicetree/bindings/arm/samsung/
  F:    Documentation/devicetree/bindings/sram/samsung-sram.txt
@@@ -1840,6 -1831,7 +1840,7 @@@ T:      git git://git.kernel.org/pub/scm/lin
  ARM/UNIPHIER ARCHITECTURE
  M:    Masahiro Yamada <yamada.masahiro@socionext.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
  S:    Maintained
  F:    arch/arm/boot/dts/uniphier*
  F:    arch/arm/include/asm/hardware/cache-uniphier.h
@@@ -2493,7 -2485,7 +2494,7 @@@ F:      include/net/bluetooth
  BONDING DRIVER
  M:    Jay Vosburgh <j.vosburgh@gmail.com>
  M:    Veaceslav Falico <vfalico@gmail.com>
- M:    Andy Gospodarek <gospo@cumulusnetworks.com>
+ M:    Andy Gospodarek <andy@greyhouse.net>
  L:    netdev@vger.kernel.org
  W:    http://sourceforge.net/projects/bonding/
  S:    Supported
@@@ -3256,7 -3248,7 +3257,7 @@@ F:      kernel/cpuset.
  CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
  M:    Johannes Weiner <hannes@cmpxchg.org>
  M:    Michal Hocko <mhocko@kernel.org>
- M:    Vladimir Davydov <vdavydov@virtuozzo.com>
+ M:    Vladimir Davydov <vdavydov.dev@gmail.com>
  L:    cgroups@vger.kernel.org
  L:    linux-mm@kvack.org
  S:    Maintained
@@@ -3277,7 -3269,7 +3278,7 @@@ S:      Maintaine
  F:    drivers/net/wan/cosa*
  
  CPMAC ETHERNET DRIVER
- M:    Florian Fainelli <florian@openwrt.org>
+ M:    Florian Fainelli <f.fainelli@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/ethernet/ti/cpmac.c
@@@ -7473,7 -7465,8 +7474,8 @@@ F:      Documentation/devicetree/bindings/so
  F:    sound/soc/codecs/max9860.*
  
  MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
- M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
+ M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-pm@vger.kernel.org
  S:    Supported
  F:    drivers/power/max14577_charger.c
@@@ -7489,7 -7482,8 +7491,8 @@@ F:      include/dt-bindings/*/*max77802.
  
  MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
  M:    Chanwoo Choi <cw00.choi@samsung.com>
- M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
+ M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-kernel@vger.kernel.org
  S:    Supported
  F:    drivers/*/max14577*.c
@@@ -9255,7 -9249,7 +9258,7 @@@ F:      drivers/pinctrl/sh-pfc
  
  PIN CONTROLLER - SAMSUNG
  M:    Tomasz Figa <tomasz.figa@gmail.com>
- M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
  M:    Sylwester Nawrocki <s.nawrocki@samsung.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
@@@ -9696,12 -9690,6 +9699,12 @@@ T:    git git://git.kernel.org/pub/scm/lin
  S:    Supported
  F:    drivers/net/wireless/ath/ath10k/
  
 +QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 +M:    Timur Tabi <timur@codeaurora.org>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    drivers/net/ethernet/qualcomm/emac/
 +
  QUALCOMM HEXAGON ARCHITECTURE
  M:    Richard Kuo <rkuo@codeaurora.org>
  L:    linux-hexagon@vger.kernel.org
@@@ -9957,7 -9945,6 +9960,7 @@@ F:      net/rfkill
  
  RHASHTABLE
  M:    Thomas Graf <tgraf@suug.ch>
 +M:    Herbert Xu <herbert@gondor.apana.org.au>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    lib/rhashtable.c
@@@ -10195,7 -10182,7 +10198,7 @@@ S:   Maintaine
  F:    drivers/platform/x86/samsung-laptop.c
  
  SAMSUNG AUDIO (ASoC) DRIVERS
- M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
  M:    Sangbeom Kim <sbkim73@samsung.com>
  M:    Sylwester Nawrocki <s.nawrocki@samsung.com>
  L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
@@@ -10210,7 -10197,8 +10213,8 @@@ F:   drivers/video/fbdev/s3c-fb.
  
  SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
  M:    Sangbeom Kim <sbkim73@samsung.com>
- M:    Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
+ M:    Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
  L:    linux-kernel@vger.kernel.org
  L:    linux-samsung-soc@vger.kernel.org
  S:    Supported
@@@ -10269,6 -10257,17 +10273,17 @@@ S: Supporte
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  F:    drivers/clk/samsung/
  
+ SAMSUNG SPI DRIVERS
+ M:    Kukjin Kim <kgene@kernel.org>
+ M:    Krzysztof Kozlowski <krzk@kernel.org>
+ M:    Andi Shyti <andi.shyti@samsung.com>
+ L:    linux-spi@vger.kernel.org
+ L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
+ S:    Maintained
+ F:    Documentation/devicetree/bindings/spi/spi-samsung.txt
+ F:    drivers/spi/spi-s3c*
+ F:    include/linux/platform_data/spi-s3c64xx.h
  SAMSUNG SXGBE DRIVERS
  M:    Byungho An <bh74.an@samsung.com>
  M:    Girish K S <ks.giri@samsung.com>
@@@ -11248,12 -11247,8 +11263,8 @@@ S:  Odd Fixe
  F:    drivers/staging/vt665?/
  
  STAGING - WILC1000 WIFI DRIVER
- M:    Johnny Kim <johnny.kim@atmel.com>
- M:    Austin Shin <austin.shin@atmel.com>
- M:    Chris Park <chris.park@atmel.com>
- M:    Tony Cho <tony.cho@atmel.com>
- M:    Glen Lee <glen.lee@atmel.com>
- M:    Leo Kim <leo.kim@atmel.com>
+ M:    Aditya Shankar <aditya.shankar@microchip.com>
+ M:    Ganesh Krishna <ganesh.krishna@microchip.com>
  L:    linux-wireless@vger.kernel.org
  S:    Supported
  F:    drivers/staging/wilc1000/
@@@ -12285,7 -12280,6 +12296,7 @@@ F:   drivers/net/usb/smsc75xx.
  
  USB SMSC95XX ETHERNET DRIVER
  M:    Steve Glendinning <steve.glendinning@shawell.net>
 +M:    Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/usb/smsc95xx.*
@@@ -553,12 -553,6 +553,6 @@@ repoll
                 * from the table.
                 */
                mqp = __mlx5_qp_lookup(dev->mdev, qpn);
-               if (unlikely(!mqp)) {
-                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
-                                    cq->mcq.cqn, qpn);
-                       return -EINVAL;
-               }
                *cur_qp = to_mibqp(mqp);
        }
  
                read_lock(&dev->mdev->priv.mkey_table.lock);
                mmkey = __mlx5_mr_lookup(dev->mdev,
                                         mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
-               if (unlikely(!mmkey)) {
-                       read_unlock(&dev->mdev->priv.mkey_table.lock);
-                       mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
-                                    cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
-                       return -EINVAL;
-               }
                mr = to_mibmr(mmkey);
                get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
                mr->sig->sig_err_exists = true;
@@@ -676,7 -663,6 +663,6 @@@ int mlx5_ib_poll_cq(struct ib_cq *ibcq
        unsigned long flags;
        int soft_polled = 0;
        int npolled;
-       int err = 0;
  
        spin_lock_irqsave(&cq->lock, flags);
        if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
                soft_polled = poll_soft_wc(cq, num_entries, wc);
  
        for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
-               err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
-               if (err)
+               if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
                        break;
        }
  
  out:
        spin_unlock_irqrestore(&cq->lock, flags);
  
-       if (err == 0 || err == -EAGAIN)
-               return soft_polled + npolled;
-       else
-               return err;
+       return soft_polled + npolled;
  }
  
  int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@@ -747,16 -729,14 +729,16 @@@ static int alloc_cq_buf(struct mlx5_ib_
  
  static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
                          struct ib_ucontext *context, struct mlx5_ib_cq *cq,
 -                        int entries, struct mlx5_create_cq_mbox_in **cqb,
 +                        int entries, u32 **cqb,
                          int *cqe_size, int *index, int *inlen)
  {
        struct mlx5_ib_create_cq ucmd;
        size_t ucmdlen;
        int page_shift;
 +      __be64 *pas;
        int npages;
        int ncont;
 +      void *cqc;
        int err;
  
        ucmdlen =
        mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
                    ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
  
 -      *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
 +      *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 +               MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
        *cqb = mlx5_vzalloc(*inlen);
        if (!*cqb) {
                err = -ENOMEM;
                goto err_db;
        }
 -      mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
 -      (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 +
 +      pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
 +      mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
 +
 +      cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 +      MLX5_SET(cqc, cqc, log_page_size,
 +               page_shift - MLX5_ADAPTER_PAGE_SHIFT);
  
        *index = to_mucontext(context)->uuari.uars[0].index;
  
@@@ -842,10 -816,9 +824,10 @@@ static void init_cq_buf(struct mlx5_ib_
  
  static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
                            int entries, int cqe_size,
 -                          struct mlx5_create_cq_mbox_in **cqb,
 -                          int *index, int *inlen)
 +                          u32 **cqb, int *index, int *inlen)
  {
 +      __be64 *pas;
 +      void *cqc;
        int err;
  
        err = mlx5_db_alloc(dev->mdev, &cq->db);
  
        init_cq_buf(cq, &cq->buf);
  
 -      *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
 +      *inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 +               MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
        *cqb = mlx5_vzalloc(*inlen);
        if (!*cqb) {
                err = -ENOMEM;
                goto err_buf;
        }
 -      mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
  
 -      (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 +      pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
 +      mlx5_fill_page_array(&cq->buf.buf, pas);
 +
 +      cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 +      MLX5_SET(cqc, cqc, log_page_size,
 +               cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 +
        *index = dev->mdev->priv.uuari.uars[0].index;
  
        return 0;
@@@ -910,12 -877,11 +892,12 @@@ struct ib_cq *mlx5_ib_create_cq(struct 
  {
        int entries = attr->cqe;
        int vector = attr->comp_vector;
 -      struct mlx5_create_cq_mbox_in *cqb = NULL;
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
        struct mlx5_ib_cq *cq;
        int uninitialized_var(index);
        int uninitialized_var(inlen);
 +      u32 *cqb = NULL;
 +      void *cqc;
        int cqe_size;
        unsigned int irqn;
        int eqn;
                INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
        }
  
 -      cq->cqe_size = cqe_size;
 -      cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
 -
 -      if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
 -              cqb->ctx.cqe_sz_flags |= (1 << 1);
 -
 -      cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
        err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
        if (err)
                goto err_cqb;
  
 -      cqb->ctx.c_eqn = cpu_to_be16(eqn);
 -      cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
 +      cq->cqe_size = cqe_size;
 +
 +      cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
 +      MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
 +      MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
 +      MLX5_SET(cqc, cqc, uar_page, index);
 +      MLX5_SET(cqc, cqc, c_eqn, eqn);
 +      MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
 +      if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
 +              MLX5_SET(cqc, cqc, oi, 1);
  
        err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
        if (err)
@@@ -1105,15 -1070,27 +1087,15 @@@ void mlx5_ib_cq_clean(struct mlx5_ib_c
  
  int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
  {
 -      struct mlx5_modify_cq_mbox_in *in;
        struct mlx5_ib_dev *dev = to_mdev(cq->device);
        struct mlx5_ib_cq *mcq = to_mcq(cq);
        int err;
 -      u32 fsel;
  
        if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
                return -ENOSYS;
  
 -      in = kzalloc(sizeof(*in), GFP_KERNEL);
 -      if (!in)
 -              return -ENOMEM;
 -
 -      in->cqn = cpu_to_be32(mcq->mcq.cqn);
 -      fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
 -      in->ctx.cq_period = cpu_to_be16(cq_period);
 -      in->ctx.cq_max_count = cpu_to_be16(cq_count);
 -      in->field_select = cpu_to_be32(fsel);
 -      err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in));
 -      kfree(in);
 -
 +      err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
 +                                           cq_period, cq_count);
        if (err)
                mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
  
@@@ -1246,11 -1223,9 +1228,11 @@@ int mlx5_ib_resize_cq(struct ib_cq *ibc
  {
        struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
        struct mlx5_ib_cq *cq = to_mcq(ibcq);
 -      struct mlx5_modify_cq_mbox_in *in;
 +      void *cqc;
 +      u32 *in;
        int err;
        int npas;
 +      __be64 *pas;
        int page_shift;
        int inlen;
        int uninitialized_var(cqe_size);
        if (err)
                goto ex;
  
 -      inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
 +      inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
 +              MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
 +
        in = mlx5_vzalloc(inlen);
        if (!in) {
                err = -ENOMEM;
                goto ex_resize;
        }
  
 +      pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
        if (udata)
                mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 -                                   in->pas, 0);
 +                                   pas, 0);
        else
 -              mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
 -
 -      in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
 -                                     MLX5_MODIFY_CQ_MASK_PG_OFFSET |
 -                                     MLX5_MODIFY_CQ_MASK_PG_SIZE);
 -      in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 -      in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
 -      in->ctx.page_offset = 0;
 -      in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
 -      in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
 -      in->cqn = cpu_to_be32(cq->mcq.cqn);
 +              mlx5_fill_page_array(&cq->resize_buf->buf, pas);
 +
 +      MLX5_SET(modify_cq_in, in,
 +               modify_field_select_resize_field_select.resize_field_select.resize_field_select,
 +               MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
 +               MLX5_MODIFY_CQ_MASK_PG_OFFSET |
 +               MLX5_MODIFY_CQ_MASK_PG_SIZE);
 +
 +      cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
 +
 +      MLX5_SET(cqc, cqc, log_page_size,
 +               page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 +      MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
 +      MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
 +
 +      MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
 +      MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
  
        err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
        if (err)
@@@ -232,19 -232,23 +232,19 @@@ static int set_roce_addr(struct ib_devi
                         const union ib_gid *gid,
                         const struct ib_gid_attr *attr)
  {
 -      struct mlx5_ib_dev *dev = to_mdev(device);
 -      u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
 -      u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
 +      struct mlx5_ib_dev *dev = to_mdev(device);
 +      u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
 +      u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
        void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
  
        if (ll != IB_LINK_LAYER_ETHERNET)
                return -EINVAL;
  
 -      memset(in, 0, sizeof(in));
 -
        ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
  
        MLX5_SET(set_roce_address_in, in, roce_address_index, index);
        MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
 -
 -      memset(out, 0, sizeof(out));
        return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
  }
  
@@@ -747,7 -751,8 +747,7 @@@ static int mlx5_query_hca_port(struct i
                                     &props->active_width);
        if (err)
                goto out;
 -      err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
 -                                       port);
 +      err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
        if (err)
                goto out;
  
@@@ -1844,6 -1849,7 +1844,7 @@@ static struct ib_flow *mlx5_ib_create_f
                                           int domain)
  {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
        struct mlx5_ib_flow_handler *handler = NULL;
        struct mlx5_flow_destination *dst = NULL;
        struct mlx5_ib_flow_prio *ft_prio;
        }
  
        dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+       if (mqp->flags & MLX5_IB_QP_RSS)
+               dst->tir_num = mqp->rss_qp.tirn;
+       else
+               dst->tir_num = mqp->raw_packet_qp.rq.tirn;
  
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
                if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
@@@ -402,6 -402,7 +402,7 @@@ enum mlx5_ib_qp_flags 
        /* QP uses 1 as its source QP number */
        MLX5_IB_QP_SQPN_QP1                     = 1 << 6,
        MLX5_IB_QP_CAP_SCATTER_FCS              = 1 << 7,
+       MLX5_IB_QP_RSS                          = 1 << 8,
  };
  
  struct mlx5_umr_wr {
@@@ -504,7 -505,7 +505,7 @@@ struct mlx5_ib_mr 
        int                     umred;
        int                     npages;
        struct mlx5_ib_dev     *dev;
 -      struct mlx5_create_mkey_mbox_out out;
 +      u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
        struct mlx5_core_sig_ctx    *sig;
        int                     live;
        void                    *descs_alloc;
@@@ -726,7 -726,7 +726,7 @@@ err_umem
  static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                          struct mlx5_ib_qp *qp, struct ib_udata *udata,
                          struct ib_qp_init_attr *attr,
 -                        struct mlx5_create_qp_mbox_in **in,
 +                        u32 **in,
                          struct mlx5_ib_create_qp_resp *resp, int *inlen,
                          struct mlx5_ib_qp_base *base)
  {
        u32 offset = 0;
        int uuarn;
        int ncont = 0;
 +      __be64 *pas;
 +      void *qpc;
        int err;
  
        err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
                ubuffer->umem = NULL;
        }
  
 -      *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
 +      *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 +               MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
        *in = mlx5_vzalloc(*inlen);
        if (!*in) {
                err = -ENOMEM;
                goto err_umem;
        }
 +
 +      pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
        if (ubuffer->umem)
 -              mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
 -                                   (*in)->pas, 0);
 -      (*in)->ctx.log_pg_sz_remote_qpn =
 -              cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
 -      (*in)->ctx.params2 = cpu_to_be32(offset << 6);
 +              mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
 +
 +      qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
  
 -      (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
 +      MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 +      MLX5_SET(qpc, qpc, page_offset, offset);
 +
 +      MLX5_SET(qpc, qpc, uar_page, uar_index);
        resp->uuar_index = uuarn;
        qp->uuarn = uuarn;
  
@@@ -863,13 -857,12 +863,13 @@@ static void destroy_qp_user(struct ib_p
  static int create_kernel_qp(struct mlx5_ib_dev *dev,
                            struct ib_qp_init_attr *init_attr,
                            struct mlx5_ib_qp *qp,
 -                          struct mlx5_create_qp_mbox_in **in, int *inlen,
 +                          u32 **in, int *inlen,
                            struct mlx5_ib_qp_base *base)
  {
        enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
        struct mlx5_uuar_info *uuari;
        int uar_index;
 +      void *qpc;
        int uuarn;
        int err;
  
        }
  
        qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
 -      *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
 +      *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 +               MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
        *in = mlx5_vzalloc(*inlen);
        if (!*in) {
                err = -ENOMEM;
                goto err_buf;
        }
 -      (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
 -      (*in)->ctx.log_pg_sz_remote_qpn =
 -              cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
 +
 +      qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
 +      MLX5_SET(qpc, qpc, uar_page, uar_index);
 +      MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 +
        /* Set "fast registration enabled" for all kernel QPs */
 -      (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
 -      (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
 +      MLX5_SET(qpc, qpc, fre, 1);
 +      MLX5_SET(qpc, qpc, rlky, 1);
  
        if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
 -              (*in)->ctx.deth_sqpn = cpu_to_be32(1);
 +              MLX5_SET(qpc, qpc, deth_sqpn, 1);
                qp->flags |= MLX5_IB_QP_SQPN_QP1;
        }
  
 -      mlx5_fill_page_array(&qp->buf, (*in)->pas);
 +      mlx5_fill_page_array(&qp->buf,
 +                           (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
  
        err = mlx5_db_alloc(dev->mdev, &qp->db);
        if (err) {
@@@ -985,15 -974,15 +985,15 @@@ static void destroy_qp_kernel(struct ml
        free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
  }
  
 -static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
 +static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
  {
        if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
            (attr->qp_type == IB_QPT_XRC_INI))
 -              return cpu_to_be32(MLX5_SRQ_RQ);
 +              return MLX5_SRQ_RQ;
        else if (!qp->has_rq)
 -              return cpu_to_be32(MLX5_ZERO_LEN_RQ);
 +              return MLX5_ZERO_LEN_RQ;
        else
 -              return cpu_to_be32(MLX5_NON_ZERO_RQ);
 +              return MLX5_NON_ZERO_RQ;
  }
  
  static int is_connected(enum ib_qp_type qp_type)
  static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
                                    struct mlx5_ib_sq *sq, u32 tdn)
  {
 -      u32 in[MLX5_ST_SZ_DW(create_tis_in)];
 +      u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
        void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
  
 -      memset(in, 0, sizeof(in));
 -
        MLX5_SET(tisc, tisc, transport_domain, tdn);
 -
        return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
  }
  
@@@ -1199,7 -1191,7 +1199,7 @@@ static void destroy_raw_packet_qp_tir(s
  }
  
  static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 -                              struct mlx5_create_qp_mbox_in *in,
 +                              u32 *in,
                                struct ib_pd *pd)
  {
        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
@@@ -1457,6 -1449,7 +1457,7 @@@ create_tir
        kvfree(in);
        /* qpn is reserved for that QP */
        qp->trans_qp.base.mqp.qpn = 0;
+       qp->flags |= MLX5_IB_QP_RSS;
        return 0;
  
  err:
@@@ -1469,18 -1462,18 +1470,18 @@@ static int create_qp_common(struct mlx5
                            struct ib_udata *udata, struct mlx5_ib_qp *qp)
  {
        struct mlx5_ib_resources *devr = &dev->devr;
 +      int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
        struct mlx5_core_dev *mdev = dev->mdev;
 -      struct mlx5_ib_qp_base *base;
        struct mlx5_ib_create_qp_resp resp;
 -      struct mlx5_create_qp_mbox_in *in;
 -      struct mlx5_ib_create_qp ucmd;
        struct mlx5_ib_cq *send_cq;
        struct mlx5_ib_cq *recv_cq;
        unsigned long flags;
 -      int inlen = sizeof(*in);
 -      int err;
        u32 uidx = MLX5_IB_DEFAULT_UIDX;
 +      struct mlx5_ib_create_qp ucmd;
 +      struct mlx5_ib_qp_base *base;
        void *qpc;
 +      u32 *in;
 +      int err;
  
        base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
               &qp->raw_packet_qp.rq.base :
                if (err)
                        return err;
        } else {
 -              in = mlx5_vzalloc(sizeof(*in));
 +              in = mlx5_vzalloc(inlen);
                if (!in)
                        return -ENOMEM;
  
        if (is_sqp(init_attr->qp_type))
                qp->port = init_attr->port_num;
  
 -      in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
 -                                  MLX5_QP_PM_MIGRATED << 11);
 +      qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 +
 +      MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
 +      MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
  
        if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
 -              in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
 +              MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
        else
 -              in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
 +              MLX5_SET(qpc, qpc, latency_sensitive, 1);
 +
  
        if (qp->wq_sig)
 -              in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
 +              MLX5_SET(qpc, qpc, wq_signature, 1);
  
        if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
 -              in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
 +              MLX5_SET(qpc, qpc, block_lb_mc, 1);
  
        if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
 -              in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
 +              MLX5_SET(qpc, qpc, cd_master, 1);
        if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
 -              in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
 +              MLX5_SET(qpc, qpc, cd_slave_send, 1);
        if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
 -              in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
 +              MLX5_SET(qpc, qpc, cd_slave_receive, 1);
  
        if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
                int rcqe_sz;
                scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
  
                if (rcqe_sz == 128)
 -                      in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
 +                      MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
                else
 -                      in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
 +                      MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
  
                if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
                        if (scqe_sz == 128)
 -                              in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
 +                              MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
                        else
 -                              in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
 +                              MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
                }
        }
  
        if (qp->rq.wqe_cnt) {
 -              in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
 -              in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
 +              MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
 +              MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
        }
  
 -      in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
 +      MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
  
        if (qp->sq.wqe_cnt)
 -              in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
 +              MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
        else
 -              in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
 +              MLX5_SET(qpc, qpc, no_sq, 1);
  
        /* Set default resources */
        switch (init_attr->qp_type) {
        case IB_QPT_XRC_TGT:
 -              in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
 -              in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
 -              in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
 -              in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
 +              MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
 +              MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
 +              MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
 +              MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
                break;
        case IB_QPT_XRC_INI:
 -              in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
 -              in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
 -              in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
 +              MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
 +              MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
 +              MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
                break;
        default:
                if (init_attr->srq) {
 -                      in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
 -                      in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
 +                      MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
 +                      MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
                } else {
 -                      in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
 -                      in->ctx.rq_type_srqn |=
 -                              cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
 +                      MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
 +                      MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
                }
        }
  
        if (init_attr->send_cq)
 -              in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
 +              MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
  
        if (init_attr->recv_cq)
 -              in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
 +              MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
  
 -      in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
 +      MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
  
 -      if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
 -              qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 -              /* 0xffffff means we ask to work with cqe version 0 */
 +      /* 0xffffff means we ask to work with cqe version 0 */
 +      if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
                MLX5_SET(qpc, qpc, user_index, uidx);
 -      }
 +
        /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
        if (init_attr->qp_type == IB_QPT_UD &&
            (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
 -              qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
                MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
                qp->flags |= MLX5_IB_QP_LSO;
        }
@@@ -1868,6 -1861,7 +1869,6 @@@ static void destroy_qp_common(struct ml
  {
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
 -      struct mlx5_modify_qp_mbox_in *in;
        unsigned long flags;
        int err;
  
               &qp->raw_packet_qp.rq.base :
               &qp->trans_qp.base;
  
 -      in = kzalloc(sizeof(*in), GFP_KERNEL);
 -      if (!in)
 -              return;
 -
        if (qp->state != IB_QPS_RESET) {
                if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
                        mlx5_ib_qp_disable_pagefaults(qp);
                        err = mlx5_core_qp_modify(dev->mdev,
 -                                                MLX5_CMD_OP_2RST_QP, in, 0,
 -                                                &base->mqp);
 +                                                MLX5_CMD_OP_2RST_QP, 0,
 +                                                NULL, &base->mqp);
                } else {
                        err = modify_raw_packet_qp(dev, qp,
                                                   MLX5_CMD_OP_2RST_QP);
                                     base->mqp.qpn);
        }
  
 -      kfree(in);
 -
        if (qp->create_type == MLX5_QP_KERNEL)
                destroy_qp_kernel(dev, qp);
        else if (qp->create_type == MLX5_QP_USER)
@@@ -2512,6 -2512,7 +2513,6 @@@ static int __mlx5_ib_modify_qp(struct i
        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
        struct mlx5_ib_cq *send_cq, *recv_cq;
        struct mlx5_qp_context *context;
 -      struct mlx5_modify_qp_mbox_in *in;
        struct mlx5_ib_pd *pd;
        enum mlx5_qp_state mlx5_cur, mlx5_new;
        enum mlx5_qp_optpar optpar;
        int err;
        u16 op;
  
 -      in = kzalloc(sizeof(*in), GFP_KERNEL);
 -      if (!in)
 +      context = kzalloc(sizeof(*context), GFP_KERNEL);
 +      if (!context)
                return -ENOMEM;
  
 -      context = &in->ctx;
        err = to_mlx5_st(ibqp->qp_type);
        if (err < 0) {
                mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
        op = optab[mlx5_cur][mlx5_new];
        optpar = ib_mask_to_mlx5_opt(attr_mask);
        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
 -      in->optparam = cpu_to_be32(optpar);
  
        if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
                err = modify_raw_packet_qp(dev, qp, op);
        else
 -              err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
 +              err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
                                          &base->mqp);
        if (err)
                goto out;
        }
  
  out:
 -      kfree(in);
 +      kfree(context);
        return err;
  }
  
@@@ -2966,7 -2969,7 +2967,7 @@@ static void set_reg_umr_seg(struct mlx5
  
        memset(umr, 0, sizeof(*umr));
  
 -      if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
 +      if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
                /* KLMs take twice the size of MTTs */
                ndescs *= 2;
  
@@@ -3109,9 -3112,9 +3110,9 @@@ static void set_reg_mkey_seg(struct mlx
  
        memset(seg, 0, sizeof(*seg));
  
 -      if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
 +      if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
                seg->log2_page_size = ilog2(mr->ibmr.page_size);
 -      else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
 +      else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
                /* KLMs take twice the size of MTTs */
                ndescs *= 2;
  
@@@ -3452,7 -3455,7 +3453,7 @@@ static void set_sig_mkey_segment(struc
        memset(seg, 0, sizeof(*seg));
  
        seg->flags = get_umr_flags(wr->access_flags) |
 -                                 MLX5_ACCESS_MODE_KLM;
 +                                 MLX5_MKC_ACCESS_MODE_KLMS;
        seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
                                    MLX5_MKEY_BSF_EN | pdn);
@@@ -3656,12 -3659,8 +3657,8 @@@ static int begin_wqe(struct mlx5_ib_qp 
                     struct ib_send_wr *wr, unsigned *idx,
                     int *size, int nreq)
  {
-       int err = 0;
-       if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
-               err = -ENOMEM;
-               return err;
-       }
+       if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+               return -ENOMEM;
  
        *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
        *seg = mlx5_get_send_wqe(qp, *idx);
        *seg += sizeof(**ctrl);
        *size = sizeof(**ctrl) / 16;
  
-       return err;
+       return 0;
  }
  
  static void finish_wqe(struct mlx5_ib_qp *qp,
@@@ -3756,7 -3755,7 +3753,7 @@@ int mlx5_ib_post_send(struct ib_qp *ibq
                num_sge = wr->num_sge;
                if (unlikely(num_sge > qp->sq.max_gs)) {
                        mlx5_ib_warn(dev, "\n");
-                       err = -ENOMEM;
+                       err = -EINVAL;
                        *bad_wr = wr;
                        goto out;
                }
@@@ -4318,24 -4317,21 +4315,24 @@@ static int query_raw_packet_qp_state(st
  static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                         struct ib_qp_attr *qp_attr)
  {
 -      struct mlx5_query_qp_mbox_out *outb;
 +      int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
        struct mlx5_qp_context *context;
        int mlx5_state;
 +      u32 *outb;
        int err = 0;
  
 -      outb = kzalloc(sizeof(*outb), GFP_KERNEL);
 +      outb = kzalloc(outlen, GFP_KERNEL);
        if (!outb)
                return -ENOMEM;
  
 -      context = &outb->ctx;
        err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
 -                               sizeof(*outb));
 +                               outlen);
        if (err)
                goto out;
  
 +      /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
 +      context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
 +
        mlx5_state = be32_to_cpu(context->flags) >> 28;
  
        qp->state                    = to_ib_qp_state(mlx5_state);
@@@ -1341,9 -1341,10 +1341,10 @@@ int bond_enslave(struct net_device *bon
                            slave_dev->name);
        }
  
-       /* already enslaved */
-       if (slave_dev->flags & IFF_SLAVE) {
-               netdev_dbg(bond_dev, "Error: Device was already enslaved\n");
+       /* already in-use? */
+       if (netdev_is_rx_handler_busy(slave_dev)) {
+               netdev_err(bond_dev,
+                          "Error: Device is in use and cannot be enslaved\n");
                return -EBUSY;
        }
  
@@@ -4627,7 -4628,7 +4628,7 @@@ static int bond_init(struct net_device 
  
        netdev_dbg(bond_dev, "Begin bond_init\n");
  
 -      bond->wq = create_singlethread_workqueue(bond_dev->name);
 +      bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM);
        if (!bond->wq)
                return -ENOMEM;
  
@@@ -772,6 -772,11 +772,11 @@@ void bnx2x_fw_dump_lvl(struct bnx2x *bp
                (bp->common.bc_ver & 0xff00) >> 8,
                (bp->common.bc_ver & 0xff));
  
+       if (pci_channel_offline(bp->pdev)) {
+               BNX2X_ERR("Cannot dump MCP info while in PCI error\n");
+               return;
+       }
        val = REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER);
        if (val == REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER))
                BNX2X_ERR("%s" "MCP PC at 0x%x\n", lvl, val);
@@@ -9415,10 -9420,16 +9420,16 @@@ unload_error
        /* Release IRQs */
        bnx2x_free_irq(bp);
  
-       /* Reset the chip */
-       rc = bnx2x_reset_hw(bp, reset_code);
-       if (rc)
-               BNX2X_ERR("HW_RESET failed\n");
+       /* Reset the chip, unless PCI function is offline. If we reach this
+        * point following a PCI error handling, it means device is really
+        * in a bad state and we're about to remove it, so reset the chip
+        * is not a good idea.
+        */
+       if (!pci_channel_offline(bp->pdev)) {
+               rc = bnx2x_reset_hw(bp, reset_code);
+               if (rc)
+                       BNX2X_ERR("HW_RESET failed\n");
+       }
  
        /* Report UNLOAD_DONE to MCP */
        bnx2x_send_unload_done(bp, keep_link);
@@@ -12560,10 -12571,8 +12571,10 @@@ static int bnx2x_init_mcast_macs_list(s
                kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC);
        struct netdev_hw_addr *ha;
  
 -      if (!mc_mac)
 +      if (!mc_mac) {
 +              BNX2X_ERR("Failed to allocate mc MAC list\n");
                return -ENOMEM;
 +      }
  
        INIT_LIST_HEAD(&p->mcast_list);
  
@@@ -12634,7 -12643,7 +12645,7 @@@ static int bnx2x_set_uc_list(struct bnx
                                 BNX2X_UC_LIST_MAC, &ramrod_flags);
  }
  
 -static int bnx2x_set_mc_list(struct bnx2x *bp)
 +static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
  {
        struct net_device *dev = bp->dev;
        struct bnx2x_mcast_ramrod_params rparam = {NULL};
        /* then, configure a new MACs list */
        if (netdev_mc_count(dev)) {
                rc = bnx2x_init_mcast_macs_list(bp, &rparam);
 -              if (rc) {
 -                      BNX2X_ERR("Failed to create multicast MACs list: %d\n",
 -                                rc);
 +              if (rc)
                        return rc;
 -              }
  
                /* Now add the new MACs */
                rc = bnx2x_config_mcast(bp, &rparam,
        return rc;
  }
  
 +static int bnx2x_set_mc_list(struct bnx2x *bp)
 +{
 +      struct bnx2x_mcast_ramrod_params rparam = {NULL};
 +      struct net_device *dev = bp->dev;
 +      int rc = 0;
 +
 +      /* On older adapters, we need to flush and re-add filters */
 +      if (CHIP_IS_E1x(bp))
 +              return bnx2x_set_mc_list_e1x(bp);
 +
 +      rparam.mcast_obj = &bp->mcast_obj;
 +
 +      if (netdev_mc_count(dev)) {
 +              rc = bnx2x_init_mcast_macs_list(bp, &rparam);
 +              if (rc)
 +                      return rc;
 +
 +              /* Override the curently configured set of mc filters */
 +              rc = bnx2x_config_mcast(bp, &rparam,
 +                                      BNX2X_MCAST_CMD_SET);
 +              if (rc < 0)
 +                      BNX2X_ERR("Failed to set a new multicast configuration: %d\n",
 +                                rc);
 +
 +              bnx2x_free_mcast_macs_list(&rparam);
 +      } else {
 +              /* If no mc addresses are required, flush the configuration */
 +              rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
 +              if (rc)
 +                      BNX2X_ERR("Failed to clear multicast configuration %d\n",
 +                                rc);
 +      }
 +
 +      return rc;
 +}
 +
  /* If bp->state is OPEN, should be called with netif_addr_lock_bh() */
  static void bnx2x_set_rx_mode(struct net_device *dev)
  {
@@@ -13238,22 -13214,13 +13249,22 @@@ static int bnx2x_init_dev(struct bnx2x 
                NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO |
                NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX;
        if (!chip_is_e1x) {
 -              dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL |
 -                                  NETIF_F_GSO_IPXIP4;
 +              dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM |
 +                                  NETIF_F_GSO_IPXIP4 |
 +                                  NETIF_F_GSO_UDP_TUNNEL |
 +                                  NETIF_F_GSO_UDP_TUNNEL_CSUM |
 +                                  NETIF_F_GSO_PARTIAL;
 +
                dev->hw_enc_features =
                        NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
                        NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 |
                        NETIF_F_GSO_IPXIP4 |
 -                      NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL;
 +                      NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM |
 +                      NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM |
 +                      NETIF_F_GSO_PARTIAL;
 +
 +              dev->gso_partial_features = NETIF_F_GSO_GRE_CSUM |
 +                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
        }
  
        dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@@ -541,14 -541,6 +541,14 @@@ static void macb_tx_unmap(struct macb *
        }
  }
  
 +static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr)
 +{
 +      desc->addr = (u32)addr;
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +      desc->addrh = (u32)(addr >> 32);
 +#endif
 +}
 +
  static void macb_tx_error_task(struct work_struct *work)
  {
        struct macb_queue       *queue = container_of(work, struct macb_queue,
  
        /* Set end of TX queue */
        desc = macb_tx_desc(queue, 0);
 -      desc->addr = 0;
 +      macb_set_addr(desc, 0);
        desc->ctrl = MACB_BIT(TX_USED);
  
        /* Make descriptor updates visible to hardware */
        wmb();
  
        /* Reinitialize the TX desc queue */
 -      queue_writel(queue, TBQP, queue->tx_ring_dma);
 +      queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +      queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
 +#endif
        /* Make TX ring reflect state of hardware */
        queue->tx_head = 0;
        queue->tx_tail = 0;
@@@ -761,7 -750,7 +761,7 @@@ static void gem_rx_refill(struct macb *
  
                        if (entry == RX_RING_SIZE - 1)
                                paddr |= MACB_BIT(RX_WRAP);
 -                      bp->rx_ring[entry].addr = paddr;
 +                      macb_set_addr(&(bp->rx_ring[entry]), paddr);
                        bp->rx_ring[entry].ctrl = 0;
  
                        /* properly align Ethernet header */
@@@ -809,9 -798,7 +809,9 @@@ static int gem_rx(struct macb *bp, int 
        int                     count = 0;
  
        while (count < budget) {
 -              u32 addr, ctrl;
 +              u32 ctrl;
 +              dma_addr_t addr;
 +              bool rxused;
  
                entry = macb_rx_ring_wrap(bp->rx_tail);
                desc = &bp->rx_ring[entry];
                /* Make hw descriptor updates visible to CPU */
                rmb();
  
 -              addr = desc->addr;
 +              rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
 +              addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +              addr |= ((u64)(desc->addrh) << 32);
 +#endif
                ctrl = desc->ctrl;
  
 -              if (!(addr & MACB_BIT(RX_USED)))
 +              if (!rxused)
                        break;
  
                bp->rx_tail++;
                netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
  
                skb_put(skb, len);
 -              addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr));
                dma_unmap_single(&bp->pdev->dev, addr,
                                 bp->rx_buffer_size, DMA_FROM_DEVICE);
  
@@@ -1315,7 -1299,7 +1315,7 @@@ static unsigned int macb_tx_map(struct 
                        ctrl |= MACB_BIT(TX_WRAP);
  
                /* Set TX buffer descriptor */
 -              desc->addr = tx_skb->mapping;
 +              macb_set_addr(desc, tx_skb->mapping);
                /* desc->addr must be visible to hardware before clearing
                 * 'TX_USED' bit in desc->ctrl.
                 */
@@@ -1339,6 -1323,24 +1339,24 @@@ dma_error
        return 0;
  }
  
+ static inline int macb_clear_csum(struct sk_buff *skb)
+ {
+       /* no change for packets without checksum offloading */
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+       /* make sure we can modify the header */
+       if (unlikely(skb_cow_head(skb, 0)))
+               return -1;
+       /* initialize checksum field
+        * This is required - at least for Zynq, which otherwise calculates
+        * wrong UDP header checksums for UDP packets with UDP data len <=2
+        */
+       *(__sum16 *)(skb_checksum_start(skb) + skb->csum_offset) = 0;
+       return 0;
+ }
  static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
  {
        u16 queue_index = skb_get_queue_mapping(skb);
                return NETDEV_TX_BUSY;
        }
  
+       if (macb_clear_csum(skb)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
        /* Map socket buffer for DMA transfer */
        if (!macb_tx_map(bp, queue, skb)) {
                dev_kfree_skb_any(skb);
@@@ -1438,9 -1445,6 +1461,9 @@@ static void gem_free_rx_buffers(struct 
  
                desc = &bp->rx_ring[i];
                addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +              addr |= ((u64)(desc->addrh) << 32);
 +#endif
                dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
                                 DMA_FROM_DEVICE);
                dev_kfree_skb_any(skb);
@@@ -1566,7 -1570,7 +1589,7 @@@ static void gem_init_rings(struct macb 
  
        for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
                for (i = 0; i < TX_RING_SIZE; i++) {
 -                      queue->tx_ring[i].addr = 0;
 +                      macb_set_addr(&(queue->tx_ring[i]), 0);
                        queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
                }
                queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
@@@ -1713,10 -1717,6 +1736,10 @@@ static void macb_configure_dma(struct m
                        dmacfg |= GEM_BIT(TXCOEN);
                else
                        dmacfg &= ~GEM_BIT(TXCOEN);
 +
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +              dmacfg |= GEM_BIT(ADDR64);
 +#endif
                netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
                           dmacfg);
                gem_writel(bp, DMACFG, dmacfg);
@@@ -1762,15 -1762,9 +1785,15 @@@ static void macb_init_hw(struct macb *b
        macb_configure_dma(bp);
  
        /* Initialize TX and RX buffers */
 -      macb_writel(bp, RBQP, bp->rx_ring_dma);
 +      macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma));
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +      macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32));
 +#endif
        for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 -              queue_writel(queue, TBQP, queue->tx_ring_dma);
 +              queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +              queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
 +#endif
  
                /* Enable interrupts */
                queue_writel(queue, IER,
@@@ -2332,8 -2326,7 +2355,8 @@@ static void macb_probe_queues(void __io
  }
  
  static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
 -                       struct clk **hclk, struct clk **tx_clk)
 +                       struct clk **hclk, struct clk **tx_clk,
 +                       struct clk **rx_clk)
  {
        int err;
  
        if (IS_ERR(*tx_clk))
                *tx_clk = NULL;
  
 +      *rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
 +      if (IS_ERR(*rx_clk))
 +              *rx_clk = NULL;
 +
        err = clk_prepare_enable(*pclk);
        if (err) {
                dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
                goto err_disable_hclk;
        }
  
 +      err = clk_prepare_enable(*rx_clk);
 +      if (err) {
 +              dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
 +              goto err_disable_txclk;
 +      }
 +
        return 0;
  
 +err_disable_txclk:
 +      clk_disable_unprepare(*tx_clk);
 +
  err_disable_hclk:
        clk_disable_unprepare(*hclk);
  
@@@ -2422,9 -2402,6 +2445,9 @@@ static int macb_init(struct platform_de
                        queue->IDR  = GEM_IDR(hw_q - 1);
                        queue->IMR  = GEM_IMR(hw_q - 1);
                        queue->TBQP = GEM_TBQP(hw_q - 1);
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +                      queue->TBQPH = GEM_TBQPH(hw_q -1);
 +#endif
                } else {
                        /* queue0 uses legacy registers */
                        queue->ISR  = MACB_ISR;
                        queue->IDR  = MACB_IDR;
                        queue->IMR  = MACB_IMR;
                        queue->TBQP = MACB_TBQP;
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +                      queue->TBQPH = MACB_TBQPH;
 +#endif
                }
  
                /* get irq: here we use the linux queue index, not the hardware
@@@ -2777,14 -2751,12 +2800,14 @@@ static const struct net_device_ops at91
  };
  
  static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
 -                            struct clk **hclk, struct clk **tx_clk)
 +                            struct clk **hclk, struct clk **tx_clk,
 +                            struct clk **rx_clk)
  {
        int err;
  
        *hclk = NULL;
        *tx_clk = NULL;
 +      *rx_clk = NULL;
  
        *pclk = devm_clk_get(&pdev->dev, "ether_clk");
        if (IS_ERR(*pclk))
@@@ -2908,13 -2880,13 +2931,13 @@@ MODULE_DEVICE_TABLE(of, macb_dt_ids)
  static int macb_probe(struct platform_device *pdev)
  {
        int (*clk_init)(struct platform_device *, struct clk **,
 -                      struct clk **, struct clk **)
 +                      struct clk **, struct clk **,  struct clk **)
                                              = macb_clk_init;
        int (*init)(struct platform_device *) = macb_init;
        struct device_node *np = pdev->dev.of_node;
        struct device_node *phy_node;
        const struct macb_config *macb_config = NULL;
 -      struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
 +      struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL;
        unsigned int queue_mask, num_queues;
        struct macb_platform_data *pdata;
        bool native_io;
                }
        }
  
 -      err = clk_init(pdev, &pclk, &hclk, &tx_clk);
 +      err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk);
        if (err)
                return err;
  
        bp->pclk = pclk;
        bp->hclk = hclk;
        bp->tx_clk = tx_clk;
 +      bp->rx_clk = rx_clk;
        if (macb_config)
                bp->jumbo_max_len = macb_config->jumbo_max_len;
  
                bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
        device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
  
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +      if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32)
 +              dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
 +#endif
 +
        spin_lock_init(&bp->lock);
  
        /* setup capabilities */
        dev->irq = platform_get_irq(pdev, 0);
        if (dev->irq < 0) {
                err = dev->irq;
 -              goto err_disable_clocks;
 +              goto err_out_free_netdev;
        }
  
        mac = of_get_mac_address(np);
@@@ -3077,7 -3043,6 +3100,7 @@@ err_disable_clocks
        clk_disable_unprepare(tx_clk);
        clk_disable_unprepare(hclk);
        clk_disable_unprepare(pclk);
 +      clk_disable_unprepare(rx_clk);
  
        return err;
  }
@@@ -3104,7 -3069,6 +3127,7 @@@ static int macb_remove(struct platform_
                clk_disable_unprepare(bp->tx_clk);
                clk_disable_unprepare(bp->hclk);
                clk_disable_unprepare(bp->pclk);
 +              clk_disable_unprepare(bp->rx_clk);
                free_netdev(dev);
        }
  
@@@ -3128,7 -3092,6 +3151,7 @@@ static int __maybe_unused macb_suspend(
                clk_disable_unprepare(bp->tx_clk);
                clk_disable_unprepare(bp->hclk);
                clk_disable_unprepare(bp->pclk);
 +              clk_disable_unprepare(bp->rx_clk);
        }
  
        return 0;
@@@ -3148,7 -3111,6 +3171,7 @@@ static int __maybe_unused macb_resume(s
                clk_prepare_enable(bp->pclk);
                clk_prepare_enable(bp->hclk);
                clk_prepare_enable(bp->tx_clk);
 +              clk_prepare_enable(bp->rx_clk);
        }
  
        netif_device_attach(netdev);
  #define       PCI_DEVICE_ID_THUNDER_NIC_VF            0xA034
  #define       PCI_DEVICE_ID_THUNDER_BGX               0xA026
  
 +/* Subsystem device IDs */
 +#define PCI_SUBSYS_DEVID_88XX_NIC_PF          0xA11E
 +#define PCI_SUBSYS_DEVID_81XX_NIC_PF          0xA21E
 +#define PCI_SUBSYS_DEVID_83XX_NIC_PF          0xA31E
 +
 +#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF    0xA11E
 +#define PCI_SUBSYS_DEVID_88XX_NIC_VF          0xA134
 +#define PCI_SUBSYS_DEVID_81XX_NIC_VF          0xA234
 +#define PCI_SUBSYS_DEVID_83XX_NIC_VF          0xA334
 +
 +
  /* PCI BAR nos */
  #define       PCI_CFG_REG_BAR_NUM             0
  #define       PCI_MSIX_REG_BAR_NUM            4
  /* Max pkinds */
  #define       NIC_MAX_PKIND                   16
  
 -/* Rx Channels */
 -/* Receive channel configuration in TNS bypass mode
 - * Below is configuration in TNS bypass mode
 - * BGX0-LMAC0-CHAN0 - VNIC CHAN0
 - * BGX0-LMAC1-CHAN0 - VNIC CHAN16
 - * ...
 - * BGX1-LMAC0-CHAN0 - VNIC CHAN128
 - * ...
 - * BGX1-LMAC3-CHAN0 - VNIC CHAN174
 - */
 -#define       NIC_INTF_COUNT                  2  /* Interfaces btw VNIC and TNS/BGX */
 -#define       NIC_CHANS_PER_INF               128
 -#define       NIC_MAX_CHANS                   (NIC_INTF_COUNT * NIC_CHANS_PER_INF)
 -#define       NIC_CPI_COUNT                   2048 /* No of channel parse indices */
 -
 -/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
 -#define NIC_MAX_BGX                   MAX_BGX_PER_CN88XX
 -#define       NIC_CPI_PER_BGX                 (NIC_CPI_COUNT / NIC_MAX_BGX)
 -#define       NIC_MAX_CPI_PER_LMAC            64 /* Max when CPI_ALG is IP diffserv */
 -#define       NIC_RSSI_PER_BGX                (NIC_RSSI_COUNT / NIC_MAX_BGX)
 -
 -/* Tx scheduling */
 -#define       NIC_MAX_TL4                     1024
 -#define       NIC_MAX_TL4_SHAPERS             256 /* 1 shaper for 4 TL4s */
 -#define       NIC_MAX_TL3                     256
 -#define       NIC_MAX_TL3_SHAPERS             64  /* 1 shaper for 4 TL3s */
 -#define       NIC_MAX_TL2                     64
 -#define       NIC_MAX_TL2_SHAPERS             2  /* 1 shaper for 32 TL2s */
 -#define       NIC_MAX_TL1                     2
 -
 -/* TNS bypass mode */
 -#define       NIC_TL2_PER_BGX                 32
 -#define       NIC_TL4_PER_BGX                 (NIC_MAX_TL4 / NIC_MAX_BGX)
 -#define       NIC_TL4_PER_LMAC                (NIC_MAX_TL4 / NIC_CHANS_PER_INF)
 +/* Max when CPI_ALG is IP diffserv */
 +#define       NIC_MAX_CPI_PER_LMAC            64
  
  /* NIC VF Interrupts */
  #define       NICVF_INTR_CQ                   0
@@@ -127,6 -148,7 +127,6 @@@ struct nicvf_cq_poll 
        struct  napi_struct napi;
  };
  
 -#define       NIC_RSSI_COUNT                  4096 /* Total no of RSS indices */
  #define NIC_MAX_RSS_HASH_BITS         8
  #define NIC_MAX_RSS_IDR_TBL_SIZE      (1 << NIC_MAX_RSS_HASH_BITS)
  #define RSS_HASH_KEY_SIZE             5 /* 320 bit key */
@@@ -251,13 -273,13 +251,14 @@@ struct nicvf 
        struct net_device       *netdev;
        struct pci_dev          *pdev;
        void __iomem            *reg_base;
 +#define       MAX_QUEUES_PER_QSET                     8
        struct queue_set        *qs;
        struct nicvf_cq_poll    *napi[8];
        u8                      vf_id;
        u8                      sqs_id;
        bool                    sqs_mode;
        bool                    hw_tso;
+       bool                    t88;
  
        /* Receive buffer alloc */
        u32                     rb_page_offset;
  #define       NIC_MBOX_MSG_PNICVF_PTR         0x14    /* Get primary qset nicvf ptr */
  #define       NIC_MBOX_MSG_SNICVF_PTR         0x15    /* Send sqet nicvf ptr to PVF */
  #define       NIC_MBOX_MSG_LOOPBACK           0x16    /* Set interface in loopback */
 +#define       NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17    /* Reset statistics counters */
  #define       NIC_MBOX_MSG_CFG_DONE           0xF0    /* VF configuration done */
  #define       NIC_MBOX_MSG_SHUTDOWN           0xF1    /* VF is being shutdown */
  
@@@ -464,31 -485,6 +465,31 @@@ struct set_loopback 
        bool  enable;
  };
  
 +/* Reset statistics counters */
 +struct reset_stat_cfg {
 +      u8    msg;
 +      /* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */
 +      u16   rx_stat_mask;
 +      /* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */
 +      u8    tx_stat_mask;
 +      /* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1)
 +       * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1)
 +       * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1)
 +       * ..
 +       * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1)
 +       * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1)
 +       */
 +      u16   rq_stat_mask;
 +      /* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1)
 +       * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1)
 +       * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1)
 +       * ..
 +       * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1)
 +       * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1)
 +       */
 +      u16   sq_stat_mask;
 +};
 +
  /* 128 bit shared memory between PF and each VF */
  union nic_mbx {
        struct { u8 msg; }      msg;
        struct sqs_alloc        sqs_alloc;
        struct nicvf_ptr        nicvf;
        struct set_loopback     lbk;
 +      struct reset_stat_cfg   reset_stat;
  };
  
  #define NIC_NODE_ID_MASK      0x03
@@@ -520,14 -515,7 +521,14 @@@ static inline int nic_get_node_id(struc
  
  static inline bool pass1_silicon(struct pci_dev *pdev)
  {
 -      return pdev->revision < 8;
 +      return (pdev->revision < 8) &&
 +              (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
 +}
 +
 +static inline bool pass2_silicon(struct pci_dev *pdev)
 +{
 +      return (pdev->revision >= 8) &&
 +              (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
  }
  
  int nicvf_set_real_num_queues(struct net_device *netdev,
  #define DRV_NAME      "thunder-nic"
  #define DRV_VERSION   "1.0"
  
 +struct hw_info {
 +      u8              bgx_cnt;
 +      u8              chans_per_lmac;
 +      u8              chans_per_bgx; /* Rx/Tx chans */
 +      u8              chans_per_rgx;
 +      u8              chans_per_lbk;
 +      u16             cpi_cnt;
 +      u16             rssi_cnt;
 +      u16             rss_ind_tbl_size;
 +      u16             tl4_cnt;
 +      u16             tl3_cnt;
 +      u8              tl2_cnt;
 +      u8              tl1_cnt;
 +      bool            tl1_per_bgx; /* TL1 per BGX or per LMAC */
 +};
 +
  struct nicpf {
        struct pci_dev          *pdev;
 +      struct hw_info          *hw;
        u8                      node;
        unsigned int            flags;
        u8                      num_vf_en;      /* No of VF enabled */
  #define       NIC_SET_VF_LMAC_MAP(bgx, lmac)  (((bgx & 0xF) << 4) | (lmac & 0xF))
  #define       NIC_GET_BGX_FROM_VF_LMAC_MAP(map)       ((map >> 4) & 0xF)
  #define       NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)      (map & 0xF)
 -      u8                      vf_lmac_map[MAX_LMAC];
 +      u8                      *vf_lmac_map;
        struct delayed_work     dwork;
        struct workqueue_struct *check_link;
 -      u8                      link[MAX_LMAC];
 -      u8                      duplex[MAX_LMAC];
 -      u32                     speed[MAX_LMAC];
 +      u8                      *link;
 +      u8                      *duplex;
 +      u32                     *speed;
        u16                     cpi_base[MAX_NUM_VFS_SUPPORTED];
        u16                     rssi_base[MAX_NUM_VFS_SUPPORTED];
 -      u16                     rss_ind_tbl_size;
        bool                    mbx_lock[MAX_NUM_VFS_SUPPORTED];
  
        /* MSI-X */
        bool                    msix_enabled;
        u8                      num_vec;
 -      struct msix_entry       msix_entries[NIC_PF_MSIX_VECTORS];
 +      struct msix_entry       *msix_entries;
        bool                    irq_allocated[NIC_PF_MSIX_VECTORS];
 +      char                    irq_name[NIC_PF_MSIX_VECTORS][20];
  };
  
  /* Supported devices */
@@@ -106,22 -89,9 +106,22 @@@ static u64 nic_reg_read(struct nicpf *n
  /* PF -> VF mailbox communication APIs */
  static void nic_enable_mbx_intr(struct nicpf *nic)
  {
 -      /* Enable mailbox interrupt for all 128 VFs */
 -      nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
 -      nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
 +      int vf_cnt = pci_sriov_get_totalvfs(nic->pdev);
 +
 +#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull))
 +
 +      /* Clear it, to avoid spurious interrupts (if any) */
 +      nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt));
 +
 +      /* Enable mailbox interrupt for all VFs */
 +      nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt));
 +      /* One mailbox intr enable reg per 64 VFs */
 +      if (vf_cnt > 64) {
 +              nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64),
 +                            INTR_MASK(vf_cnt - 64));
 +              nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64),
 +                            INTR_MASK(vf_cnt - 64));
 +      }
  }
  
  static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
@@@ -174,7 -144,7 +174,7 @@@ static void nic_mbx_send_ready(struct n
  
        mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
  
 -      if (vf < MAX_LMAC) {
 +      if (vf < nic->num_vf_en) {
                bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
                lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
  
        mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
        mbx.nic_cfg.node_id = nic->node;
  
 -      mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
 +      mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en;
  
        nic_send_msg_to_vf(nic, vf, &mbx);
  }
@@@ -278,22 -248,19 +278,27 @@@ static int nic_update_hw_frs(struct nic
  /* Set minimum transmit packet size */
  static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
  {
 -      int lmac;
 +      int lmac, max_lmac;
 +      u16 sdevid;
        u64 lmac_cfg;
  
-       /* Max value that can be set is 60 */
-       if (size > 60)
-               size = 60;
+       /* There is a issue in HW where-in while sending GSO sized
+        * pkts as part of TSO, if pkt len falls below this size
+        * NIC will zero PAD packet and also updates IP total length.
+        * Hence set this value to lessthan min pkt size of MAC+IP+TCP
+        * headers, BGX will do the padding to transmit 64 byte pkt.
+        */
+       if (size > 52)
+               size = 52;
  
 -      for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
 +      pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
 +      /* 81xx's RGX has only one LMAC */
 +      if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF)
 +              max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1;
 +      else
 +              max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
 +
 +      for (lmac = 0; lmac < max_lmac; lmac++) {
                lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
                lmac_cfg &= ~(0xF << 2);
                lmac_cfg |= ((size / 4) << 2);
@@@ -313,7 -280,7 +318,7 @@@ static void nic_set_lmac_vf_mapping(str
  
        nic->num_vf_en = 0;
  
 -      for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
 +      for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) {
                if (!(bgx_map & (1 << bgx)))
                        continue;
                lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
                        nic_reg_write(nic,
                                      NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
                                      lmac_credit);
 +
 +              /* On CN81XX there are only 8 VFs but max possible no of
 +               * interfaces are 9.
 +               */
 +              if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) {
 +                      nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev);
 +                      break;
 +              }
        }
  }
  
 +static void nic_free_lmacmem(struct nicpf *nic)
 +{
 +      kfree(nic->vf_lmac_map);
 +      kfree(nic->link);
 +      kfree(nic->duplex);
 +      kfree(nic->speed);
 +}
 +
 +static int nic_get_hw_info(struct nicpf *nic)
 +{
 +      u8 max_lmac;
 +      u16 sdevid;
 +      struct hw_info *hw = nic->hw;
 +
 +      pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
 +
 +      switch (sdevid) {
 +      case PCI_SUBSYS_DEVID_88XX_NIC_PF:
 +              hw->bgx_cnt = MAX_BGX_PER_CN88XX;
 +              hw->chans_per_lmac = 16;
 +              hw->chans_per_bgx = 128;
 +              hw->cpi_cnt = 2048;
 +              hw->rssi_cnt = 4096;
 +              hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
 +              hw->tl3_cnt = 256;
 +              hw->tl2_cnt = 64;
 +              hw->tl1_cnt = 2;
 +              hw->tl1_per_bgx = true;
 +              break;
 +      case PCI_SUBSYS_DEVID_81XX_NIC_PF:
 +              hw->bgx_cnt = MAX_BGX_PER_CN81XX;
 +              hw->chans_per_lmac = 8;
 +              hw->chans_per_bgx = 32;
 +              hw->chans_per_rgx = 8;
 +              hw->chans_per_lbk = 24;
 +              hw->cpi_cnt = 512;
 +              hw->rssi_cnt = 256;
 +              hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */
 +              hw->tl3_cnt = 64;
 +              hw->tl2_cnt = 16;
 +              hw->tl1_cnt = 10;
 +              hw->tl1_per_bgx = false;
 +              break;
 +      case PCI_SUBSYS_DEVID_83XX_NIC_PF:
 +              hw->bgx_cnt = MAX_BGX_PER_CN83XX;
 +              hw->chans_per_lmac = 8;
 +              hw->chans_per_bgx = 32;
 +              hw->chans_per_lbk = 64;
 +              hw->cpi_cnt = 2048;
 +              hw->rssi_cnt = 1024;
 +              hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */
 +              hw->tl3_cnt = 256;
 +              hw->tl2_cnt = 64;
 +              hw->tl1_cnt = 18;
 +              hw->tl1_per_bgx = false;
 +              break;
 +      }
 +      hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
 +
 +      /* Allocate memory for LMAC tracking elements */
 +      max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX;
 +      nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
 +      if (!nic->vf_lmac_map)
 +              goto error;
 +      nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
 +      if (!nic->link)
 +              goto error;
 +      nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
 +      if (!nic->duplex)
 +              goto error;
 +      nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL);
 +      if (!nic->speed)
 +              goto error;
 +      return 0;
 +
 +error:
 +      nic_free_lmacmem(nic);
 +      return -ENOMEM;
 +}
 +
  #define BGX0_BLOCK 8
  #define BGX1_BLOCK 9
  
 -static void nic_init_hw(struct nicpf *nic)
 +static int nic_init_hw(struct nicpf *nic)
  {
 -      int i;
 +      int i, err;
        u64 cqm_cfg;
  
 +      /* Get HW capability info */
 +      err = nic_get_hw_info(nic);
 +      if (err)
 +              return err;
 +
        /* Enable NIC HW block */
        nic_reg_write(nic, NIC_PF_CFG, 0x3);
  
        /* Enable backpressure */
        nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
  
 -      /* Disable TNS mode on both interfaces */
 -      nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
 -                    (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
 -      nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
 -                    (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
 +      /* TNS and TNS bypass modes are present only on 88xx */
 +      if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) {
 +              /* Disable TNS mode on both interfaces */
 +              nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
 +                            (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
 +              nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
 +                            (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
 +      }
 +
        nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
                      (1ULL << 63) | BGX0_BLOCK);
        nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
        cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
        if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
                nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
 +
 +      return 0;
  }
  
  /* Channel parse index configuration */
  static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
  {
 +      struct hw_info *hw = nic->hw;
        u32 vnic, bgx, lmac, chan;
        u32 padd, cpi_count = 0;
        u64 cpi_base, cpi, rssi_base, rssi;
        bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
        lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
  
 -      chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
 -      cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
 -      rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
 +      chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
 +      cpi_base = vnic * NIC_MAX_CPI_PER_LMAC;
 +      rssi_base = vnic * hw->rss_ind_tbl_size;
  
        /* Rx channel configuration */
        nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
@@@ -572,7 -439,7 +577,7 @@@ static void nic_send_rss_size(struct ni
        msg = (u64 *)&mbx;
  
        mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
 -      mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size;
 +      mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
        nic_send_msg_to_vf(nic, vf, &mbx);
  }
  
@@@ -619,7 -486,7 +624,7 @@@ static void nic_config_rss(struct nicp
  /* 4 level transmit side scheduler configutation
   * for TNS bypass mode
   *
 - * Sample configuration for SQ0
 + * Sample configuration for SQ0 on 88xx
   * VNIC0-SQ0 -> TL4(0)   -> TL3[0]   -> TL2[0]  -> TL1[0] -> BGX0
   * VNIC1-SQ0 -> TL4(8)   -> TL3[2]   -> TL2[0]  -> TL1[0] -> BGX0
   * VNIC2-SQ0 -> TL4(16)  -> TL3[4]   -> TL2[1]  -> TL1[0] -> BGX0
  static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
                               struct sq_cfg_msg *sq)
  {
 +      struct hw_info *hw = nic->hw;
        u32 bgx, lmac, chan;
        u32 tl2, tl3, tl4;
        u32 rr_quantum;
        /* 24 bytes for FCS, IPG and preamble */
        rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
  
 -      if (!sq->sqs_mode) {
 -              tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
 -      } else {
 -              for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
 -                      if (nic->vf_sqs[pqs_vnic][svf] == vnic)
 -                              break;
 +      /* For 88xx 0-511 TL4 transmits via BGX0 and
 +       * 512-1023 TL4s transmit via BGX1.
 +       */
 +      if (hw->tl1_per_bgx) {
 +              tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt);
 +              if (!sq->sqs_mode) {
 +                      tl4 += (lmac * MAX_QUEUES_PER_QSET);
 +              } else {
 +                      for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
 +                              if (nic->vf_sqs[pqs_vnic][svf] == vnic)
 +                                      break;
 +                      }
 +                      tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET);
 +                      tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF);
 +                      tl4 += (svf * MAX_QUEUES_PER_QSET);
                }
 -              tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
 -              tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
 -              tl4 += (svf * NIC_TL4_PER_LMAC);
 -              tl4 += (bgx * NIC_TL4_PER_BGX);
 +      } else {
 +              tl4 = (vnic * MAX_QUEUES_PER_QSET);
        }
        tl4 += sq_idx;
  
 -      tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
 +      tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt);
        nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
                      ((u64)vnic << NIC_QS_ID_SHIFT) |
                      ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
                      ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
  
        nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
 -      chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
 -      nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
 +
 +      /* On 88xx 0-127 channels are for BGX0 and
 +       * 127-255 channels for BGX1.
 +       *
 +       * On 81xx/83xx TL3_CHAN reg should be configured with channel
 +       * within LMAC i.e 0-7 and not the actual channel number like on 88xx
 +       */
 +      chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
 +      if (hw->tl1_per_bgx)
 +              nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
 +      else
 +              nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0);
 +
        /* Enable backpressure on the channel */
        nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
  
        nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
        /* No priorities as of now */
        nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
 +
 +      /* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1'
 +       * on 81xx/83xx TL2 needs to be configured to transmit to one of the
 +       * possible LMACs.
 +       *
 +       * This register doesn't exist on 88xx.
 +       */
 +      if (!hw->tl1_per_bgx)
 +              nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3),
 +                            lmac + (bgx * MAX_LMAC_PER_BGX));
  }
  
  /* Send primary nicvf pointer to secondary QS's VF */
@@@ -782,7 -620,7 +787,7 @@@ static int nic_config_loopback(struct n
  {
        int bgx_idx, lmac_idx;
  
 -      if (lbk->vf_id > MAX_LMAC)
 +      if (lbk->vf_id >= nic->num_vf_en)
                return -1;
  
        bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
        return 0;
  }
  
 +/* Reset statistics counters */
 +static int nic_reset_stat_counters(struct nicpf *nic,
 +                                 int vf, struct reset_stat_cfg *cfg)
 +{
 +      int i, stat, qnum;
 +      u64 reg_addr;
 +
 +      for (i = 0; i < RX_STATS_ENUM_LAST; i++) {
 +              if (cfg->rx_stat_mask & BIT(i)) {
 +                      reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 |
 +                                 (vf << NIC_QS_ID_SHIFT) |
 +                                 (i << 3);
 +                      nic_reg_write(nic, reg_addr, 0);
 +              }
 +      }
 +
 +      for (i = 0; i < TX_STATS_ENUM_LAST; i++) {
 +              if (cfg->tx_stat_mask & BIT(i)) {
 +                      reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 |
 +                                 (vf << NIC_QS_ID_SHIFT) |
 +                                 (i << 3);
 +                      nic_reg_write(nic, reg_addr, 0);
 +              }
 +      }
 +
 +      for (i = 0; i <= 15; i++) {
 +              qnum = i >> 1;
 +              stat = i & 1 ? 1 : 0;
 +              reg_addr = (vf << NIC_QS_ID_SHIFT) |
 +                         (qnum << NIC_Q_NUM_SHIFT) | (stat << 3);
 +              if (cfg->rq_stat_mask & BIT(i)) {
 +                      reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1;
 +                      nic_reg_write(nic, reg_addr, 0);
 +              }
 +              if (cfg->sq_stat_mask & BIT(i)) {
 +                      reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1;
 +                      nic_reg_write(nic, reg_addr, 0);
 +              }
 +      }
 +      return 0;
 +}
 +
 +static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf)
 +{
 +      u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT;
 +      u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) |
 +                            (IPV4_PROT_DEF) << 16 | ET_PROT_DEF;
 +
 +      /* Configure tunnel parsing parameters */
 +      nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF,
 +                    (1ULL << 63 | UDP_GENEVE_PORT_NUM));
 +      nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF,
 +                    ((7ULL << 61) | prot_def));
 +      nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF,
 +                    ((7ULL << 61) | prot_def));
 +      nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1,
 +                    ((1ULL << 63) | UDP_VXLAN_PORT_NUM));
 +      nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF,
 +                    ((0xfULL << 60) | vxlan_prot_def));
 +}
 +
  static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
  {
        int bgx, lmac;
@@@ -892,17 -669,18 +897,17 @@@ static void nic_handle_mbx_intr(struct 
                mbx_addr += sizeof(u64);
        }
  
 -      dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n",
 +      dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n",
                __func__, mbx.msg.msg, vf);
        switch (mbx.msg.msg) {
        case NIC_MBOX_MSG_READY:
                nic_mbx_send_ready(nic, vf);
 -              if (vf < MAX_LMAC) {
 +              if (vf < nic->num_vf_en) {
                        nic->link[vf] = 0;
                        nic->duplex[vf] = 0;
                        nic->speed[vf] = 0;
                }
 -              ret = 1;
 -              break;
 +              goto unlock;
        case NIC_MBOX_MSG_QS_CFG:
                reg_addr = NIC_PF_QSET_0_127_CFG |
                           (mbx.qs.num << NIC_QS_ID_SHIFT);
                           (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
                           (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
                nic_reg_write(nic, reg_addr, mbx.rq.cfg);
 +              /* Enable CQE_RX2_S extension in CQE_RX descriptor.
 +               * This gets appended by default on 81xx/83xx chips,
 +               * for consistency enabling the same on 88xx pass2
 +               * where this is introduced.
 +               */
 +              if (pass2_silicon(nic->pdev))
 +                      nic_reg_write(nic, NIC_PF_RX_CFG, 0x01);
 +              if (!pass1_silicon(nic->pdev))
 +                      nic_enable_tunnel_parsing(nic, vf);
                break;
        case NIC_MBOX_MSG_RQ_BP_CFG:
                reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
                nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
                break;
        case NIC_MBOX_MSG_SET_MAC:
 -              if (vf >= nic->num_vf_en)
 +              if (vf >= nic->num_vf_en) {
 +                      ret = -1; /* NACK */
                        break;
 +              }
                lmac = mbx.mac.vf_id;
                bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
                lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
        case NIC_MBOX_MSG_LOOPBACK:
                ret = nic_config_loopback(nic, &mbx.lbk);
                break;
 +      case NIC_MBOX_MSG_RESET_STAT_COUNTER:
 +              ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
 +              break;
        default:
                dev_err(&nic->pdev->dev,
                        "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
                break;
        }
  
 -      if (!ret)
 +      if (!ret) {
                nic_mbx_send_ack(nic, vf);
 -      else if (mbx.msg.msg != NIC_MBOX_MSG_READY)
 +      } else if (mbx.msg.msg != NIC_MBOX_MSG_READY) {
 +              dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n",
 +                      mbx.msg.msg, vf);
                nic_mbx_send_nack(nic, vf);
 +      }
  unlock:
        nic->mbx_lock[vf] = false;
  }
  
 -static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
 +static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
  {
 +      struct nicpf *nic = (struct nicpf *)nic_irq;
 +      int mbx;
        u64 intr;
        u8  vf, vf_per_mbx_reg = 64;
  
 +      if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector)
 +              mbx = 0;
 +      else
 +              mbx = 1;
 +
        intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
        dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
        for (vf = 0; vf < vf_per_mbx_reg; vf++) {
                        nic_clear_mbx_intr(nic, vf, mbx);
                }
        }
 -}
 -
 -static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq)
 -{
 -      struct nicpf *nic = (struct nicpf *)nic_irq;
 -
 -      nic_mbx_intr_handler(nic, 0);
 -
 -      return IRQ_HANDLED;
 -}
 -
 -static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq)
 -{
 -      struct nicpf *nic = (struct nicpf *)nic_irq;
 -
 -      nic_mbx_intr_handler(nic, 1);
 -
        return IRQ_HANDLED;
  }
  
@@@ -1055,13 -826,7 +1060,13 @@@ static int nic_enable_msix(struct nicp
  {
        int i, ret;
  
 -      nic->num_vec = NIC_PF_MSIX_VECTORS;
 +      nic->num_vec = pci_msix_vec_count(nic->pdev);
 +
 +      nic->msix_entries = kmalloc_array(nic->num_vec,
 +                                        sizeof(struct msix_entry),
 +                                        GFP_KERNEL);
 +      if (!nic->msix_entries)
 +              return -ENOMEM;
  
        for (i = 0; i < nic->num_vec; i++)
                nic->msix_entries[i].entry = i;
        ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec);
        if (ret) {
                dev_err(&nic->pdev->dev,
 -                      "Request for #%d msix vectors failed\n",
 -                         nic->num_vec);
 +                      "Request for #%d msix vectors failed, returned %d\n",
 +                         nic->num_vec, ret);
 +              kfree(nic->msix_entries);
                return ret;
        }
  
@@@ -1083,7 -847,6 +1088,7 @@@ static void nic_disable_msix(struct nic
  {
        if (nic->msix_enabled) {
                pci_disable_msix(nic->pdev);
 +              kfree(nic->msix_entries);
                nic->msix_enabled = 0;
                nic->num_vec = 0;
        }
@@@ -1102,26 -865,27 +1107,26 @@@ static void nic_free_all_interrupts(str
  
  static int nic_register_interrupts(struct nicpf *nic)
  {
 -      int ret;
 +      int i, ret;
  
        /* Enable MSI-X */
        ret = nic_enable_msix(nic);
        if (ret)
                return ret;
  
 -      /* Register mailbox interrupt handlers */
 -      ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector,
 -                        nic_mbx0_intr_handler, 0, "NIC Mbox0", nic);
 -      if (ret)
 -              goto fail;
 -
 -      nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true;
 +      /* Register mailbox interrupt handler */
 +      for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) {
 +              sprintf(nic->irq_name[i],
 +                      "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
  
 -      ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector,
 -                        nic_mbx1_intr_handler, 0, "NIC Mbox1", nic);
 -      if (ret)
 -              goto fail;
 +              ret = request_irq(nic->msix_entries[i].vector,
 +                                nic_mbx_intr_handler, 0,
 +                                nic->irq_name[i], nic);
 +              if (ret)
 +                      goto fail;
  
 -      nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true;
 +              nic->irq_allocated[i] = true;
 +      }
  
        /* Enable mailbox interrupt */
        nic_enable_mbx_intr(nic);
  fail:
        dev_err(&nic->pdev->dev, "Request irq failed\n");
        nic_free_all_interrupts(nic);
 +      nic_disable_msix(nic);
        return ret;
  }
  
@@@ -1145,12 -908,6 +1150,12 @@@ static int nic_num_sqs_en(struct nicpf 
        int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
        u16 total_vf;
  
 +      /* Secondary Qsets are needed only if CPU count is
 +       * morethan MAX_QUEUES_PER_QSET.
 +       */
 +      if (num_online_cpus() <= MAX_QUEUES_PER_QSET)
 +              return 0;
 +
        /* Check if its a multi-node environment */
        if (nr_node_ids > 1)
                sqs_per_vf = MAX_SQS_PER_VF;
@@@ -1256,12 -1013,6 +1261,12 @@@ static int nic_probe(struct pci_dev *pd
        if (!nic)
                return -ENOMEM;
  
 +      nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
 +      if (!nic->hw) {
 +              devm_kfree(dev, nic);
 +              return -ENOMEM;
 +      }
 +
        pci_set_drvdata(pdev, nic);
  
        nic->pdev = pdev;
  
        nic->node = nic_get_node_id(pdev);
  
 -      nic_set_lmac_vf_mapping(nic);
 -
        /* Initialize hardware */
 -      nic_init_hw(nic);
 +      err = nic_init_hw(nic);
 +      if (err)
 +              goto err_release_regions;
  
 -      /* Set RSS TBL size for each VF */
 -      nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
 +      nic_set_lmac_vf_mapping(nic);
  
        /* Register interrupts */
        err = nic_register_interrupts(nic);
@@@ -1339,9 -1091,6 +1344,9 @@@ err_unregister_interrupts
  err_release_regions:
        pci_release_regions(pdev);
  err_disable_device:
 +      nic_free_lmacmem(nic);
 +      devm_kfree(dev, nic->hw);
 +      devm_kfree(dev, nic);
        pci_disable_device(pdev);
        pci_set_drvdata(pdev, NULL);
        return err;
@@@ -1362,11 -1111,6 +1367,11 @@@ static void nic_remove(struct pci_dev *
  
        nic_unregister_interrupts(nic);
        pci_release_regions(pdev);
 +
 +      nic_free_lmacmem(nic);
 +      devm_kfree(&pdev->dev, nic->hw);
 +      devm_kfree(&pdev->dev, nic);
 +
        pci_disable_device(pdev);
        pci_set_drvdata(pdev, NULL);
  }
  static const struct pci_device_id nicvf_id_table[] = {
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
                         PCI_DEVICE_ID_THUNDER_NIC_VF,
 -                       PCI_VENDOR_ID_CAVIUM, 0xA134) },
 +                       PCI_VENDOR_ID_CAVIUM,
 +                       PCI_SUBSYS_DEVID_88XX_NIC_VF) },
        { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
                         PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
 -                       PCI_VENDOR_ID_CAVIUM, 0xA11E) },
 +                       PCI_VENDOR_ID_CAVIUM,
 +                       PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) },
 +      { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 +                       PCI_DEVICE_ID_THUNDER_NIC_VF,
 +                       PCI_VENDOR_ID_CAVIUM,
 +                       PCI_SUBSYS_DEVID_81XX_NIC_VF) },
 +      { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 +                       PCI_DEVICE_ID_THUNDER_NIC_VF,
 +                       PCI_VENDOR_ID_CAVIUM,
 +                       PCI_SUBSYS_DEVID_83XX_NIC_VF) },
        { 0, }  /* end of table */
  };
  
@@@ -144,19 -134,15 +144,19 @@@ int nicvf_send_msg_to_pf(struct nicvf *
  
        /* Wait for previous message to be acked, timeout 2sec */
        while (!nic->pf_acked) {
 -              if (nic->pf_nacked)
 +              if (nic->pf_nacked) {
 +                      netdev_err(nic->netdev,
 +                                 "PF NACK to mbox msg 0x%02x from VF%d\n",
 +                                 (mbx->msg.msg & 0xFF), nic->vf_id);
                        return -EINVAL;
 +              }
                msleep(sleep);
                if (nic->pf_acked)
                        break;
                timeout -= sleep;
                if (!timeout) {
                        netdev_err(nic->netdev,
 -                                 "PF didn't ack to mbox msg %d from VF%d\n",
 +                                 "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
                                   (mbx->msg.msg & 0xFF), nic->vf_id);
                        return -EBUSY;
                }
@@@ -366,7 -352,13 +366,7 @@@ static int nicvf_rss_init(struct nicvf 
  
        rss->enable = true;
  
 -      /* Using the HW reset value for now */
 -      rss->key[0] = 0xFEED0BADFEED0BADULL;
 -      rss->key[1] = 0xFEED0BADFEED0BADULL;
 -      rss->key[2] = 0xFEED0BADFEED0BADULL;
 -      rss->key[3] = 0xFEED0BADFEED0BADULL;
 -      rss->key[4] = 0xFEED0BADFEED0BADULL;
 -
 +      netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
        nicvf_set_rss_key(nic);
  
        rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
@@@ -515,13 -507,13 +515,14 @@@ static int nicvf_init_resources(struct 
  
  static void nicvf_snd_pkt_handler(struct net_device *netdev,
                                  struct cmp_queue *cq,
 -                                struct cqe_send_t *cqe_tx, int cqe_type)
 +                                struct cqe_send_t *cqe_tx,
 +                                int cqe_type, int budget)
  {
        struct sk_buff *skb = NULL;
        struct nicvf *nic = netdev_priv(netdev);
        struct snd_queue *sq;
        struct sq_hdr_subdesc *hdr;
+       struct sq_hdr_subdesc *tso_sqe;
  
        sq = &nic->qs->sq[cqe_tx->sq_idx];
  
  
        nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
        skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
-       /* For TSO offloaded packets only one SQE will have a valid SKB */
        if (skb) {
+               /* Check for dummy descriptor used for HW TSO offload on 88xx */
+               if (hdr->dont_send) {
+                       /* Get actual TSO descriptors and free them */
+                       tso_sqe =
+                        (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+                       nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+               }
                nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
                prefetch(skb);
 -              dev_consume_skb_any(skb);
 +              napi_consume_skb(skb, budget);
                sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
        } else {
-               /* In case of HW TSO, HW sends a CQE for each segment of a TSO
-                * packet instead of a single CQE for the whole TSO packet
-                * transmitted. Each of this CQE points to the same SQE, so
-                * avoid freeing same SQE multiple times.
+               /* In case of SW TSO on 88xx, only last segment will have
+                * a SKB attached, so just free SQEs here.
                 */
                if (!nic->hw_tso)
                        nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
@@@ -695,8 -691,7 +700,8 @@@ loop
                break;
                case CQE_TYPE_SEND:
                        nicvf_snd_pkt_handler(netdev, cq,
 -                                            (void *)cq_desc, CQE_TYPE_SEND);
 +                                            (void *)cq_desc, CQE_TYPE_SEND,
 +                                            budget);
                        tx_done++;
                break;
                case CQE_TYPE_INVALID:
@@@ -938,19 -933,16 +943,19 @@@ static int nicvf_register_interrupts(st
        int vector;
  
        for_each_cq_irq(irq)
 -              sprintf(nic->irq_name[irq], "NICVF%d CQ%d",
 -                      nic->vf_id, irq);
 +              sprintf(nic->irq_name[irq], "%s-rxtx-%d",
 +                      nic->pnicvf->netdev->name,
 +                      nicvf_netdev_qidx(nic, irq));
  
        for_each_sq_irq(irq)
 -              sprintf(nic->irq_name[irq], "NICVF%d SQ%d",
 -                      nic->vf_id, irq - NICVF_INTR_ID_SQ);
 +              sprintf(nic->irq_name[irq], "%s-sq-%d",
 +                      nic->pnicvf->netdev->name,
 +                      nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ));
  
        for_each_rbdr_irq(irq)
 -              sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
 -                      nic->vf_id, irq - NICVF_INTR_ID_RBDR);
 +              sprintf(nic->irq_name[irq], "%s-rbdr-%d",
 +                      nic->pnicvf->netdev->name,
 +                      nic->sqs_mode ? (nic->sqs_id + 1) : 0);
  
        /* Register CQ interrupts */
        for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
        }
  
        /* Register QS error interrupt */
 -      sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
 -              "NICVF%d Qset error", nic->vf_id);
 +      sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d",
 +              nic->pnicvf->netdev->name,
 +              nic->sqs_mode ? (nic->sqs_id + 1) : 0);
        irq = NICVF_INTR_ID_QS_ERR;
        ret = request_irq(nic->msix_entries[irq].vector,
                          nicvf_qs_err_intr_handler,
@@@ -1205,7 -1196,7 +1210,7 @@@ int nicvf_open(struct net_device *netde
        }
  
        /* Check if we got MAC address from PF or else generate a radom MAC */
 -      if (is_zero_ether_addr(netdev->dev_addr)) {
 +      if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) {
                eth_hw_addr_random(netdev);
                nicvf_hw_set_mac_addr(nic, netdev);
        }
@@@ -1516,6 -1507,7 +1521,7 @@@ static int nicvf_probe(struct pci_dev *
        struct net_device *netdev;
        struct nicvf *nic;
        int    err, qcount;
+       u16    sdevid;
  
        err = pci_enable_device(pdev);
        if (err) {
                goto err_release_regions;
        }
  
 -      qcount = MAX_CMP_QUEUES_PER_QS;
 +      qcount = netif_get_num_default_rss_queues();
  
        /* Restrict multiqset support only for host bound VFs */
        if (pdev->is_virtfn) {
                /* Set max number of queues per VF */
 -              qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
 -              qcount = min(qcount,
 -                           (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
 +              qcount = min_t(int, num_online_cpus(),
 +                             (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
        }
  
        netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
        if (!pass1_silicon(nic->pdev))
                nic->hw_tso = true;
  
+       pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+       if (sdevid == 0xA134)
+               nic->t88 = true;
        /* Check if this VF is in QS only mode */
        if (nic->sqs_mode)
                return 0;
@@@ -479,16 -479,6 +479,16 @@@ void nicvf_config_vlan_stripping(struc
                                              NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
  }
  
 +static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
 +{
 +      union nic_mbx mbx = {};
 +
 +      /* Reset all RXQ's stats */
 +      mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
 +      mbx.reset_stat.rq_stat_mask = 0xFFFF;
 +      nicvf_send_msg_to_pf(nic, &mbx);
 +}
 +
  /* Configures receive queue */
  static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
                                   int qidx, bool enable)
@@@ -772,10 -762,10 +772,10 @@@ int nicvf_set_qset_resources(struct nic
        nic->qs = qs;
  
        /* Set count of each queue */
 -      qs->rbdr_cnt = RBDR_CNT;
 -      qs->rq_cnt = RCV_QUEUE_CNT;
 -      qs->sq_cnt = SND_QUEUE_CNT;
 -      qs->cq_cnt = CMP_QUEUE_CNT;
 +      qs->rbdr_cnt = DEFAULT_RBDR_CNT;
 +      qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
 +      qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
 +      qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
  
        /* Set queue lengths */
        qs->rbdr_len = RCV_BUF_COUNT;
@@@ -822,11 -812,6 +822,11 @@@ int nicvf_config_data_transfer(struct n
                nicvf_free_resources(nic);
        }
  
 +      /* Reset RXQ's stats.
 +       * SQ's stats will get reset automatically once SQ is reset.
 +       */
 +      nicvf_reset_rcv_queue_stats(nic);
 +
        return 0;
  }
  
@@@ -953,6 -938,8 +953,8 @@@ static int nicvf_tso_count_subdescs(str
        return num_edescs + sh->gso_segs;
  }
  
+ #define POST_CQE_DESC_COUNT 2
  /* Get the number of SQ descriptors needed to xmit this skb */
  static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
  {
                return subdesc_cnt;
        }
  
+       /* Dummy descriptors to get TSO pkt completion notification */
+       if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
+               subdesc_cnt += POST_CQE_DESC_COUNT;
        if (skb_shinfo(skb)->nr_frags)
                subdesc_cnt += skb_shinfo(skb)->nr_frags;
  
@@@ -980,14 -971,21 +986,21 @@@ nicvf_sq_add_hdr_subdesc(struct nicvf *
        struct sq_hdr_subdesc *hdr;
  
        hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
-       sq->skbuff[qentry] = (u64)skb;
        memset(hdr, 0, SND_QUEUE_DESC_SIZE);
        hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
-       /* Enable notification via CQE after processing SQE */
-       hdr->post_cqe = 1;
-       /* No of subdescriptors following this */
-       hdr->subdesc_cnt = subdesc_cnt;
+       if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
+               /* post_cqe = 0, to avoid HW posting a CQE for every TSO
+                * segment transmitted on 88xx.
+                */
+               hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
+       } else {
+               sq->skbuff[qentry] = (u64)skb;
+               /* Enable notification via CQE after processing SQE */
+               hdr->post_cqe = 1;
+               /* No of subdescriptors following this */
+               hdr->subdesc_cnt = subdesc_cnt;
+       }
        hdr->tot_len = len;
  
        /* Offload checksum calculation to HW */
@@@ -1038,6 -1036,37 +1051,37 @@@ static inline void nicvf_sq_add_gather_
        gather->addr = data;
  }
  
+ /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
+  * packet so that a CQE is posted as a notifation for transmission of
+  * TSO packet.
+  */
+ static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
+                                           int tso_sqe, struct sk_buff *skb)
+ {
+       struct sq_imm_subdesc *imm;
+       struct sq_hdr_subdesc *hdr;
+       sq->skbuff[qentry] = (u64)skb;
+       hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+       memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+       hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+       /* Enable notification via CQE after processing SQE */
+       hdr->post_cqe = 1;
+       /* There is no packet to transmit here */
+       hdr->dont_send = 1;
+       hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
+       hdr->tot_len = 1;
+       /* Actual TSO header SQE index, needed for cleanup */
+       hdr->rsvd2 = tso_sqe;
+       qentry = nicvf_get_nxt_sqentry(sq, qentry);
+       imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
+       memset(imm, 0, SND_QUEUE_DESC_SIZE);
+       imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
+       imm->len = 1;
+ }
  /* Segment a TSO packet into 'gso_size' segments and append
   * them to SQ for transfer
   */
@@@ -1111,7 -1140,7 +1155,7 @@@ static int nicvf_sq_append_tso(struct n
  int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
  {
        int i, size;
-       int subdesc_cnt;
+       int subdesc_cnt, tso_sqe = 0;
        int sq_num, qentry;
        struct queue_set *qs;
        struct snd_queue *sq;
        /* Add SQ header subdesc */
        nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
                                 skb, skb->len);
+       tso_sqe = qentry;
  
        /* Add SQ gather subdescs */
        qentry = nicvf_get_nxt_sqentry(sq, qentry);
        }
  
  doorbell:
+       if (nic->t88 && skb_shinfo(skb)->gso_size) {
+               qentry = nicvf_get_nxt_sqentry(sq, qentry);
+               nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+       }
        /* make sure all memory stores are done before ringing doorbell */
        smp_wmb();
  
@@@ -1199,23 -1234,13 +1249,23 @@@ struct sk_buff *nicvf_get_rcv_skb(struc
        int frag;
        int payload_len = 0;
        struct sk_buff *skb = NULL;
 -      struct sk_buff *skb_frag = NULL;
 -      struct sk_buff *prev_frag = NULL;
 +      struct page *page;
 +      int offset;
        u16 *rb_lens = NULL;
        u64 *rb_ptrs = NULL;
  
        rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
 -      rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
 +      /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
 +       * CQE_RX at word6, hence buffer pointers move by word
 +       *
 +       * Use existing 'hw_tso' flag which will be set for all chips
 +       * except 88xx pass1 instead of a additional cache line
 +       * access (or miss) by using pci dev's revision.
 +       */
 +      if (!nic->hw_tso)
 +              rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
 +      else
 +              rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
  
        netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
                   __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
                        skb_put(skb, payload_len);
                } else {
                        /* Add fragments */
 -                      skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs,
 -                                                     payload_len);
 -                      if (!skb_frag) {
 -                              dev_kfree_skb(skb);
 -                              return NULL;
 -                      }
 -
 -                      if (!skb_shinfo(skb)->frag_list)
 -                              skb_shinfo(skb)->frag_list = skb_frag;
 -                      else
 -                              prev_frag->next = skb_frag;
 -
 -                      prev_frag = skb_frag;
 -                      skb->len += payload_len;
 -                      skb->data_len += payload_len;
 -                      skb_frag->len = payload_len;
 +                      page = virt_to_page(phys_to_virt(*rb_ptrs));
 +                      offset = phys_to_virt(*rb_ptrs) - page_address(page);
 +                      skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 +                                      offset, payload_len, RCV_FRAG_LEN);
                }
                /* Next buffer pointer */
                rb_ptrs++;
@@@ -41,7 -41,7 +41,7 @@@ static const char i40e_driver_string[] 
  
  #define DRV_VERSION_MAJOR 1
  #define DRV_VERSION_MINOR 6
 -#define DRV_VERSION_BUILD 11
 +#define DRV_VERSION_BUILD 12
  #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@@ -527,7 -527,6 +527,7 @@@ void i40e_pf_reset_stats(struct i40e_p
                        pf->veb[i]->stat_offsets_loaded = false;
                }
        }
 +      pf->hw_csum_rx_error = 0;
  }
  
  /**
@@@ -4617,7 -4616,7 +4617,7 @@@ static u8 i40e_dcb_get_enabled_tc(struc
  static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
  {
        struct i40e_hw *hw = &pf->hw;
 -      u8 i, enabled_tc;
 +      u8 i, enabled_tc = 1;
        u8 num_tc = 0;
        struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
  
        else
                return 1; /* Only TC0 */
  
 -      /* At least have TC0 */
 -      enabled_tc = (enabled_tc ? enabled_tc : 0x1);
        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
                if (enabled_tc & BIT(i))
                        num_tc++;
@@@ -5112,9 -5113,13 +5112,13 @@@ static int i40e_init_pf_dcb(struct i40e
                                       DCB_CAP_DCBX_VER_IEEE;
  
                        pf->flags |= I40E_FLAG_DCB_CAPABLE;
-                       /* Enable DCB tagging only when more than one TC */
+                       /* Enable DCB tagging only when more than one TC
+                        * or explicitly disable if only one TC
+                        */
                        if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
                                pf->flags |= I40E_FLAG_DCB_ENABLED;
+                       else
+                               pf->flags &= ~I40E_FLAG_DCB_ENABLED;
                        dev_dbg(&pf->pdev->dev,
                                "DCBX offload is supported for this PF.\n");
                }
@@@ -5715,7 -5720,7 +5719,7 @@@ static int i40e_handle_lldp_event(struc
        u8 type;
  
        /* Not DCB capable or capability disabled */
-       if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+       if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
                return ret;
  
        /* Ignore if event is not for Nearest Bridge */
@@@ -7895,6 -7900,7 +7899,7 @@@ static int i40e_init_interrupt_scheme(s
  #endif
                                       I40E_FLAG_RSS_ENABLED    |
                                       I40E_FLAG_DCB_CAPABLE    |
+                                      I40E_FLAG_DCB_ENABLED    |
                                       I40E_FLAG_SRIOV_ENABLED  |
                                       I40E_FLAG_FD_SB_ENABLED  |
                                       I40E_FLAG_FD_ATR_ENABLED |
@@@ -7984,34 -7990,72 +7989,34 @@@ static int i40e_setup_misc_vector(struc
  static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
                              u8 *lut, u16 lut_size)
  {
 -      struct i40e_aqc_get_set_rss_key_data rss_key;
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
 -      bool pf_lut = false;
 -      u8 *rss_lut;
 -      int ret, i;
 -
 -      memcpy(&rss_key, seed, sizeof(rss_key));
 -
 -      rss_lut = kzalloc(pf->rss_table_size, GFP_KERNEL);
 -      if (!rss_lut)
 -              return -ENOMEM;
 -
 -      /* Populate the LUT with max no. of queues in round robin fashion */
 -      for (i = 0; i < vsi->rss_table_size; i++)
 -              rss_lut[i] = i % vsi->rss_size;
 +      int ret = 0;
  
 -      ret = i40e_aq_set_rss_key(hw, vsi->id, &rss_key);
 -      if (ret) {
 -              dev_info(&pf->pdev->dev,
 -                       "Cannot set RSS key, err %s aq_err %s\n",
 -                       i40e_stat_str(&pf->hw, ret),
 -                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 -              goto config_rss_aq_out;
 +      if (seed) {
 +              struct i40e_aqc_get_set_rss_key_data *seed_dw =
 +                      (struct i40e_aqc_get_set_rss_key_data *)seed;
 +              ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
 +              if (ret) {
 +                      dev_info(&pf->pdev->dev,
 +                               "Cannot set RSS key, err %s aq_err %s\n",
 +                               i40e_stat_str(hw, ret),
 +                               i40e_aq_str(hw, hw->aq.asq_last_status));
 +                      return ret;
 +              }
        }
 +      if (lut) {
 +              bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
  
 -      if (vsi->type == I40E_VSI_MAIN)
 -              pf_lut = true;
 -
 -      ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, rss_lut,
 -                                vsi->rss_table_size);
 -      if (ret)
 -              dev_info(&pf->pdev->dev,
 -                       "Cannot set RSS lut, err %s aq_err %s\n",
 -                       i40e_stat_str(&pf->hw, ret),
 -                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 -
 -config_rss_aq_out:
 -      kfree(rss_lut);
 -      return ret;
 -}
 -
 -/**
 - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
 - * @vsi: VSI structure
 - **/
 -static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
 -{
 -      u8 seed[I40E_HKEY_ARRAY_SIZE];
 -      struct i40e_pf *pf = vsi->back;
 -      u8 *lut;
 -      int ret;
 -
 -      if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
 -              return 0;
 -
 -      lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 -      if (!lut)
 -              return -ENOMEM;
 -
 -      i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
 -      netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
 -      vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs);
 -      ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
 -      kfree(lut);
 -
 +              ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
 +              if (ret) {
 +                      dev_info(&pf->pdev->dev,
 +                               "Cannot set RSS lut, err %s aq_err %s\n",
 +                               i40e_stat_str(hw, ret),
 +                               i40e_aq_str(hw, hw->aq.asq_last_status));
 +                      return ret;
 +              }
 +      }
        return ret;
  }
  
@@@ -8061,46 -8105,6 +8066,46 @@@ static int i40e_get_rss_aq(struct i40e_
        return ret;
  }
  
 +/**
 + * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
 + * @vsi: VSI structure
 + **/
 +static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
 +{
 +      u8 seed[I40E_HKEY_ARRAY_SIZE];
 +      struct i40e_pf *pf = vsi->back;
 +      u8 *lut;
 +      int ret;
 +
 +      if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
 +              return 0;
 +
 +      if (!vsi->rss_size)
 +              vsi->rss_size = min_t(int, pf->alloc_rss_size,
 +                                    vsi->num_queue_pairs);
 +      if (!vsi->rss_size)
 +              return -EINVAL;
 +
 +      lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 +      if (!lut)
 +              return -ENOMEM;
 +      /* Use the user configured hash keys and lookup table if there is one,
 +       * otherwise use default
 +       */
 +      if (vsi->rss_lut_user)
 +              memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
 +      else
 +              i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
 +      if (vsi->rss_hkey_user)
 +              memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
 +      else
 +              netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
 +      ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
 +      kfree(lut);
 +
 +      return ret;
 +}
 +
  /**
   * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
   * @vsi: Pointer to vsi structure
@@@ -8691,28 -8695,6 +8696,28 @@@ bool i40e_set_ntuple(struct i40e_pf *pf
        return need_reset;
  }
  
 +/**
 + * i40e_clear_rss_lut - clear the rx hash lookup table
 + * @vsi: the VSI being configured
 + **/
 +static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
 +{
 +      struct i40e_pf *pf = vsi->back;
 +      struct i40e_hw *hw = &pf->hw;
 +      u16 vf_id = vsi->vf_id;
 +      u8 i;
 +
 +      if (vsi->type == I40E_VSI_MAIN) {
 +              for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
 +                      wr32(hw, I40E_PFQF_HLUT(i), 0);
 +      } else if (vsi->type == I40E_VSI_SRIOV) {
 +              for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
 +                      i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
 +      } else {
 +              dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
 +      }
 +}
 +
  /**
   * i40e_set_features - set the netdev feature flags
   * @netdev: ptr to the netdev being adjusted
@@@ -8726,12 -8708,6 +8731,12 @@@ static int i40e_set_features(struct net
        struct i40e_pf *pf = vsi->back;
        bool need_reset;
  
 +      if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
 +              i40e_pf_config_rss(pf);
 +      else if (!(features & NETIF_F_RXHASH) &&
 +               netdev->features & NETIF_F_RXHASH)
 +              i40e_clear_rss_lut(vsi);
 +
        if (features & NETIF_F_HW_VLAN_CTAG_RX)
                i40e_vlan_stripping_enable(vsi);
        else
@@@ -10531,6 -10507,7 +10536,7 @@@ static void i40e_determine_queue_usage(
                               I40E_FLAG_FD_SB_ENABLED  |
                               I40E_FLAG_FD_ATR_ENABLED |
                               I40E_FLAG_DCB_CAPABLE    |
+                              I40E_FLAG_DCB_ENABLED    |
                               I40E_FLAG_SRIOV_ENABLED  |
                               I40E_FLAG_VMDQ_ENABLED);
        } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
                /* Not enough queues for all TCs */
                if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
                    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
-                       pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+                       pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
+                                       I40E_FLAG_DCB_ENABLED);
                        dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
                }
                pf->num_lan_qps = max_t(int, pf->rss_size_max,
@@@ -10951,7 -10929,7 +10958,7 @@@ static int i40e_probe(struct pci_dev *p
        err = i40e_init_pf_dcb(pf);
        if (err) {
                dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
-               pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+               pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED);
                /* Continue without DCB enabled */
        }
  #endif /* CONFIG_I40E_DCB */
@@@ -11604,8 -11582,7 +11611,8 @@@ static int __init i40e_init_module(void
         * it can't be any worse than using the system workqueue which
         * was already single threaded
         */
 -      i40e_wq = create_singlethread_workqueue(i40e_driver_name);
 +      i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
 +                                i40e_driver_name);
        if (!i40e_wq) {
                pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
                return -ENOMEM;
@@@ -50,6 -50,10 +50,10 @@@ static const struct mtk_ethtool_stats 
        MTK_ETHTOOL_STAT(rx_flow_control_packets),
  };
  
+ static const char * const mtk_clks_source_name[] = {
+       "ethif", "esw", "gp1", "gp2"
+ };
  void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
  {
        __raw_writel(val, eth->base + reg);
@@@ -291,7 -295,7 +295,7 @@@ err_phy
  static int mtk_mdio_init(struct mtk_eth *eth)
  {
        struct device_node *mii_np;
-       int err;
+       int ret;
  
        mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
        if (!mii_np) {
        }
  
        if (!of_device_is_available(mii_np)) {
-               err = 0;
+               ret = -ENODEV;
                goto err_put_node;
        }
  
-       eth->mii_bus = mdiobus_alloc();
+       eth->mii_bus = devm_mdiobus_alloc(eth->dev);
        if (!eth->mii_bus) {
-               err = -ENOMEM;
+               ret = -ENOMEM;
                goto err_put_node;
        }
  
        eth->mii_bus->parent = eth->dev;
  
        snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
-       err = of_mdiobus_register(eth->mii_bus, mii_np);
-       if (err)
-               goto err_free_bus;
-       return 0;
- err_free_bus:
-       mdiobus_free(eth->mii_bus);
+       ret = of_mdiobus_register(eth->mii_bus, mii_np);
  
  err_put_node:
        of_node_put(mii_np);
-       eth->mii_bus = NULL;
-       return err;
+       return ret;
  }
  
  static void mtk_mdio_cleanup(struct mtk_eth *eth)
                return;
  
        mdiobus_unregister(eth->mii_bus);
-       of_node_put(eth->mii_bus->dev.of_node);
-       mdiobus_free(eth->mii_bus);
  }
  
 -static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
 +static inline void mtk_irq_disable(struct mtk_eth *eth,
 +                                 unsigned reg, u32 mask)
  {
        unsigned long flags;
        u32 val;
  
        spin_lock_irqsave(&eth->irq_lock, flags);
 -      val = mtk_r32(eth, MTK_QDMA_INT_MASK);
 -      mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
 +      val = mtk_r32(eth, reg);
 +      mtk_w32(eth, val & ~mask, reg);
        spin_unlock_irqrestore(&eth->irq_lock, flags);
  }
  
 -static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
 +static inline void mtk_irq_enable(struct mtk_eth *eth,
 +                                unsigned reg, u32 mask)
  {
        unsigned long flags;
        u32 val;
  
        spin_lock_irqsave(&eth->irq_lock, flags);
 -      val = mtk_r32(eth, MTK_QDMA_INT_MASK);
 -      mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
 +      val = mtk_r32(eth, reg);
 +      mtk_w32(eth, val | mask, reg);
        spin_unlock_irqrestore(&eth->irq_lock, flags);
  }
  
@@@ -371,17 -363,18 +365,17 @@@ static int mtk_set_mac_address(struct n
        int ret = eth_mac_addr(dev, p);
        struct mtk_mac *mac = netdev_priv(dev);
        const char *macaddr = dev->dev_addr;
 -      unsigned long flags;
  
        if (ret)
                return ret;
  
 -      spin_lock_irqsave(&mac->hw->page_lock, flags);
 +      spin_lock_bh(&mac->hw->page_lock);
        mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
                MTK_GDMA_MAC_ADRH(mac->id));
        mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
                (macaddr[4] << 8) | macaddr[5],
                MTK_GDMA_MAC_ADRL(mac->id));
 -      spin_unlock_irqrestore(&mac->hw->page_lock, flags);
 +      spin_unlock_bh(&mac->hw->page_lock);
  
        return 0;
  }
@@@ -589,14 -582,15 +583,15 @@@ static int mtk_tx_map(struct sk_buff *s
        dma_addr_t mapped_addr;
        unsigned int nr_frags;
        int i, n_desc = 1;
-       u32 txd4 = 0;
+       u32 txd4 = 0, fport;
  
        itxd = ring->next_free;
        if (itxd == ring->last_free)
                return -ENOMEM;
  
        /* set the forward port */
-       txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+       fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+       txd4 |= fport;
  
        tx_buf = mtk_desc_to_tx_buf(ring, itxd);
        memset(tx_buf, 0, sizeof(*tx_buf));
                        WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
                                               TX_DMA_PLEN0(frag_map_size) |
                                               last_frag * TX_DMA_LS0));
-                       WRITE_ONCE(txd->txd4, 0);
+                       WRITE_ONCE(txd->txd4, fport);
  
                        tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
                        tx_buf = mtk_desc_to_tx_buf(ring, txd);
@@@ -765,6 -759,7 +760,6 @@@ static int mtk_start_xmit(struct sk_buf
        struct mtk_eth *eth = mac->hw;
        struct mtk_tx_ring *ring = &eth->tx_ring;
        struct net_device_stats *stats = &dev->stats;
 -      unsigned long flags;
        bool gso = false;
        int tx_num;
  
         * however we have 2 queues running on the same ring so we need to lock
         * the ring access
         */
 -      spin_lock_irqsave(&eth->page_lock, flags);
 +      spin_lock(&eth->page_lock);
  
        tx_num = mtk_cal_txd_req(skb);
        if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
                mtk_stop_queue(eth);
                netif_err(eth, tx_queued, dev,
                          "Tx Ring full when queue awake!\n");
 -              spin_unlock_irqrestore(&eth->page_lock, flags);
 +              spin_unlock(&eth->page_lock);
                return NETDEV_TX_BUSY;
        }
  
        if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
                mtk_stop_queue(eth);
  
 -      spin_unlock_irqrestore(&eth->page_lock, flags);
 +      spin_unlock(&eth->page_lock);
  
        return NETDEV_TX_OK;
  
  drop:
 -      spin_unlock_irqrestore(&eth->page_lock, flags);
 +      spin_unlock(&eth->page_lock);
        stats->tx_dropped++;
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
@@@ -865,7 -860,7 +860,7 @@@ static int mtk_poll_rx(struct napi_stru
                /* receive data */
                skb = build_skb(data, ring->frag_size);
                if (unlikely(!skb)) {
-                       put_page(virt_to_head_page(new_data));
+                       skb_free_frag(new_data);
                        netdev->stats.rx_dropped++;
                        goto release_desc;
                }
@@@ -895,18 -890,17 +890,18 @@@ release_desc
                rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
  
                ring->calc_idx = idx;
 +
 +              done++;
 +      }
 +
 +      if (done) {
                /* make sure that all changes to the dma ring are flushed before
                 * we continue
                 */
                wmb();
 -              mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
 -              done++;
 +              mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0);
        }
  
 -      if (done < budget)
 -              mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
 -
        return done;
  }
  
@@@ -1015,7 -1009,7 +1010,7 @@@ static int mtk_napi_tx(struct napi_stru
                return budget;
  
        napi_complete(napi);
 -      mtk_irq_enable(eth, MTK_TX_DONE_INT);
 +      mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
  
        return tx_done;
  }
@@@ -1025,33 -1019,30 +1020,33 @@@ static int mtk_napi_rx(struct napi_stru
        struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
        u32 status, mask;
        int rx_done = 0;
 +      int remain_budget = budget;
  
        mtk_handle_status_irq(eth);
 -      mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
 -      rx_done = mtk_poll_rx(napi, budget, eth);
 +
 +poll_again:
 +      mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
 +      rx_done = mtk_poll_rx(napi, remain_budget, eth);
  
        if (unlikely(netif_msg_intr(eth))) {
 -              status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
 -              mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
 +              status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
 +              mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
                dev_info(eth->dev,
                         "done rx %d, intr 0x%08x/0x%x\n",
                         rx_done, status, mask);
        }
 -
 -      if (rx_done == budget)
 -              return budget;
 -
 -      status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
 -      if (status & MTK_RX_DONE_INT)
 +      if (rx_done == remain_budget)
                return budget;
  
 +      status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
 +      if (status & MTK_RX_DONE_INT) {
 +              remain_budget -= rx_done;
 +              goto poll_again;
 +      }
        napi_complete(napi);
 -      mtk_irq_enable(eth, MTK_RX_DONE_INT);
 +      mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  
 -      return rx_done;
 +      return rx_done + budget - remain_budget;
  }
  
  static int mtk_tx_alloc(struct mtk_eth *eth)
        mtk_w32(eth,
                ring->phys + ((MTK_DMA_SIZE - 1) * sz),
                MTK_QTX_DRX_PTR);
 +      mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
  
        return 0;
  
@@@ -1169,10 -1159,11 +1164,10 @@@ static int mtk_rx_alloc(struct mtk_eth 
         */
        wmb();
  
 -      mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
 -      mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
 -      mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
 -      mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
 -      mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
 +      mtk_w32(eth, eth->rx_ring.phys, MTK_PRX_BASE_PTR0);
 +      mtk_w32(eth, MTK_DMA_SIZE, MTK_PRX_MAX_CNT0);
 +      mtk_w32(eth, eth->rx_ring.calc_idx, MTK_PRX_CRX_IDX0);
 +      mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_PDMA_RST_IDX);
  
        return 0;
  }
@@@ -1291,7 -1282,7 +1286,7 @@@ static irqreturn_t mtk_handle_irq_rx(in
  
        if (likely(napi_schedule_prep(&eth->rx_napi))) {
                __napi_schedule(&eth->rx_napi);
 -              mtk_irq_disable(eth, MTK_RX_DONE_INT);
 +              mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        }
  
        return IRQ_HANDLED;
@@@ -1303,7 -1294,7 +1298,7 @@@ static irqreturn_t mtk_handle_irq_tx(in
  
        if (likely(napi_schedule_prep(&eth->tx_napi))) {
                __napi_schedule(&eth->tx_napi);
 -              mtk_irq_disable(eth, MTK_TX_DONE_INT);
 +              mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
        }
  
        return IRQ_HANDLED;
@@@ -1314,12 -1305,11 +1309,12 @@@ static void mtk_poll_controller(struct 
  {
        struct mtk_mac *mac = netdev_priv(dev);
        struct mtk_eth *eth = mac->hw;
 -      u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
  
 -      mtk_irq_disable(eth, int_mask);
 +      mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 +      mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        mtk_handle_irq_rx(eth->irq[2], dev);
 -      mtk_irq_enable(eth, int_mask);
 +      mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 +      mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  }
  #endif
  
@@@ -1334,15 -1324,11 +1329,15 @@@ static int mtk_start_dma(struct mtk_et
        }
  
        mtk_w32(eth,
 -              MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
 -              MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
 -              MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
 +              MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
 +              MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO,
                MTK_QDMA_GLO_CFG);
  
 +      mtk_w32(eth,
 +              MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
 +              MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
 +              MTK_PDMA_GLO_CFG);
 +
        return 0;
  }
  
@@@ -1360,8 -1346,7 +1355,8 @@@ static int mtk_open(struct net_device *
  
                napi_enable(&eth->tx_napi);
                napi_enable(&eth->rx_napi);
 -              mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
 +              mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 +              mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        }
        atomic_inc(&eth->dma_refcnt);
  
  
  static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
  {
 -      unsigned long flags;
        u32 val;
        int i;
  
        /* stop the dma engine */
 -      spin_lock_irqsave(&eth->page_lock, flags);
 +      spin_lock_bh(&eth->page_lock);
        val = mtk_r32(eth, glo_cfg);
        mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
                glo_cfg);
 -      spin_unlock_irqrestore(&eth->page_lock, flags);
 +      spin_unlock_bh(&eth->page_lock);
  
        /* wait for dma stop */
        for (i = 0; i < 10; i++) {
@@@ -1406,8 -1392,7 +1401,8 @@@ static int mtk_stop(struct net_device *
        if (!atomic_dec_and_test(&eth->dma_refcnt))
                return 0;
  
 -      mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
 +      mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 +      mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
        napi_disable(&eth->tx_napi);
        napi_disable(&eth->rx_napi);
  
@@@ -1461,9 -1446,7 +1456,9 @@@ static int __init mtk_hw_init(struct mt
  
        /* disable delay and normal interrupt */
        mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
 -      mtk_irq_disable(eth, ~0);
 +      mtk_w32(eth, 0, MTK_PDMA_DELAY_INT);
 +      mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
 +      mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
        mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
        mtk_w32(eth, 0, MTK_RST_GL);
  
        for (i = 0; i < 2; i++) {
                u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
  
 -              /* setup the forward port to send frame to QDMA */
 +              /* setup the forward port to send frame to PDMA */
                val &= ~0xffff;
 -              val |= 0x5555;
  
                /* Enable RX checksum */
                val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
@@@ -1517,11 -1501,7 +1512,11 @@@ static void mtk_uninit(struct net_devic
        struct mtk_eth *eth = mac->hw;
  
        phy_disconnect(mac->phy_dev);
 -      mtk_irq_disable(eth, ~0);
 +      mtk_mdio_cleanup(eth);
 +      mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
 +      mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
 +      free_irq(eth->irq[1], dev);
 +      free_irq(eth->irq[2], dev);
  }
  
  static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@@ -1698,7 -1678,7 +1693,7 @@@ static void mtk_get_ethtool_stats(struc
        }
  
        do {
 -              data_src = (u64*)hwstats;
 +              data_src = (u64 *)hwstats;
                data_dst = data;
                start = u64_stats_fetch_begin_irq(&hwstats->syncp);
  
        } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
  }
  
 -static struct ethtool_ops mtk_ethtool_ops = {
 +static const struct ethtool_ops mtk_ethtool_ops = {
        .get_settings           = mtk_get_settings,
        .set_settings           = mtk_set_settings,
        .get_drvinfo            = mtk_get_drvinfo,
@@@ -1825,6 -1805,7 +1820,7 @@@ static int mtk_probe(struct platform_de
        if (!eth)
                return -ENOMEM;
  
+       eth->dev = &pdev->dev;
        eth->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(eth->base))
                return PTR_ERR(eth->base);
                        return -ENXIO;
                }
        }
+       for (i = 0; i < ARRAY_SIZE(eth->clks); i++) {
+               eth->clks[i] = devm_clk_get(eth->dev,
+                                           mtk_clks_source_name[i]);
+               if (IS_ERR(eth->clks[i])) {
+                       if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER)
+                               return -EPROBE_DEFER;
+                       return -ENODEV;
+               }
+       }
  
-       eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
-       eth->clk_esw = devm_clk_get(&pdev->dev, "esw");
-       eth->clk_gp1 = devm_clk_get(&pdev->dev, "gp1");
-       eth->clk_gp2 = devm_clk_get(&pdev->dev, "gp2");
-       if (IS_ERR(eth->clk_esw) || IS_ERR(eth->clk_gp1) ||
-           IS_ERR(eth->clk_gp2) || IS_ERR(eth->clk_ethif))
-               return -ENODEV;
-       clk_prepare_enable(eth->clk_ethif);
-       clk_prepare_enable(eth->clk_esw);
-       clk_prepare_enable(eth->clk_gp1);
-       clk_prepare_enable(eth->clk_gp2);
+       clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
+       clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
+       clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
+       clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
  
-       eth->dev = &pdev->dev;
        eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
        INIT_WORK(&eth->pending_work, mtk_pending_work);
  
@@@ -1915,15 -1896,25 +1911,23 @@@ err_free_dev
  static int mtk_remove(struct platform_device *pdev)
  {
        struct mtk_eth *eth = platform_get_drvdata(pdev);
+       int i;
+       /* stop all devices to make sure that dma is properly shut down */
+       for (i = 0; i < MTK_MAC_COUNT; i++) {
+               if (!eth->netdev[i])
+                       continue;
+               mtk_stop(eth->netdev[i]);
+       }
  
-       clk_disable_unprepare(eth->clk_ethif);
-       clk_disable_unprepare(eth->clk_esw);
-       clk_disable_unprepare(eth->clk_gp1);
-       clk_disable_unprepare(eth->clk_gp2);
+       clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+       clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
+       clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
+       clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
  
        netif_napi_del(&eth->tx_napi);
        netif_napi_del(&eth->rx_napi);
        mtk_cleanup(eth);
 -      mtk_mdio_cleanup(eth);
 -      platform_set_drvdata(pdev, NULL);
  
        return 0;
  }
  /* Unicast Filter MAC Address Register - High */
  #define MTK_GDMA_MAC_ADRH(x)  (0x50C + (x * 0x1000))
  
 +/* PDMA RX Base Pointer Register */
 +#define MTK_PRX_BASE_PTR0     0x900
 +
 +/* PDMA RX Maximum Count Register */
 +#define MTK_PRX_MAX_CNT0      0x904
 +
 +/* PDMA RX CPU Pointer Register */
 +#define MTK_PRX_CRX_IDX0      0x908
 +
 +/* PDMA Global Configuration Register */
 +#define MTK_PDMA_GLO_CFG      0xa04
 +#define MTK_MULTI_EN          BIT(10)
 +
 +/* PDMA Reset Index Register */
 +#define MTK_PDMA_RST_IDX      0xa08
 +#define MTK_PST_DRX_IDX0      BIT(16)
 +
 +/* PDMA Delay Interrupt Register */
 +#define MTK_PDMA_DELAY_INT    0xa0c
 +
 +/* PDMA Interrupt Status Register */
 +#define MTK_PDMA_INT_STATUS   0xa20
 +
 +/* PDMA Interrupt Mask Register */
 +#define MTK_PDMA_INT_MASK     0xa28
 +
  /* PDMA Interrupt grouping registers */
  #define MTK_PDMA_INT_GRP1     0xa50
  #define MTK_PDMA_INT_GRP2     0xa54
  
  /* QDMA Interrupt Status Register */
  #define MTK_QMTK_INT_STATUS   0x1A18
 +#define MTK_RX_DONE_INT3      BIT(19)
 +#define MTK_RX_DONE_INT2      BIT(18)
  #define MTK_RX_DONE_INT1      BIT(17)
  #define MTK_RX_DONE_INT0      BIT(16)
  #define MTK_TX_DONE_INT3      BIT(3)
  #define MTK_TX_DONE_INT2      BIT(2)
  #define MTK_TX_DONE_INT1      BIT(1)
  #define MTK_TX_DONE_INT0      BIT(0)
 -#define MTK_RX_DONE_INT               (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1)
 +#define MTK_RX_DONE_INT               (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1 | \
 +                               MTK_RX_DONE_INT2 | MTK_RX_DONE_INT3)
  #define MTK_TX_DONE_INT               (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
                                 MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
  
@@@ -319,6 -290,17 +319,17 @@@ enum mtk_tx_flags 
        MTK_TX_FLAGS_PAGE0      = 0x02,
  };
  
+ /* This enum allows us to identify how the clock is defined on the array of the
+  * clock in the order
+  */
+ enum mtk_clks_map {
+       MTK_CLK_ETHIF,
+       MTK_CLK_ESW,
+       MTK_CLK_GP1,
+       MTK_CLK_GP2,
+       MTK_CLK_MAX
+ };
  /* struct mtk_tx_buf -        This struct holds the pointers to the memory pointed at
   *                    by the TX descriptor    s
   * @skb:              The SKB pointer of the packet being sent
@@@ -399,10 -381,7 +410,7 @@@ struct mtk_rx_ring 
   * @scratch_ring:     Newer SoCs need memory for a second HW managed TX ring
   * @phy_scratch_ring: physical address of scratch_ring
   * @scratch_head:     The scratch memory that scratch_ring points to.
-  * @clk_ethif:                The ethif clock
-  * @clk_esw:          The switch clock
-  * @clk_gp1:          The gmac1 clock
-  * @clk_gp2:          The gmac2 clock
+  * @clks:             clock array for all clocks required
   * @mii_bus:          If there is a bus we need to create an instance for it
   * @pending_work:     The workqueue used to reset the dma ring
   */
@@@ -429,10 -408,8 +437,8 @@@ struct mtk_eth 
        struct mtk_tx_dma               *scratch_ring;
        dma_addr_t                      phy_scratch_ring;
        void                            *scratch_head;
-       struct clk                      *clk_ethif;
-       struct clk                      *clk_esw;
-       struct clk                      *clk_gp1;
-       struct clk                      *clk_gp2;
+       struct clk                      *clks[MTK_CLK_MAX];
        struct mii_bus                  *mii_bus;
        struct work_struct              pending_work;
  };
@@@ -71,10 -71,11 +71,11 @@@ int mlx4_en_setup_tc(struct net_device 
  #ifdef CONFIG_MLX4_EN_DCB
        if (!mlx4_is_slave(priv->mdev->dev)) {
                if (up) {
-                       priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+                       if (priv->dcbx_cap)
+                               priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
                } else {
                        priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
-                       priv->cee_params.dcb_cfg.pfc_state = false;
+                       priv->cee_config.pfc_state = false;
                }
        }
  #endif /* CONFIG_MLX4_EN_DCB */
@@@ -2642,16 -2643,12 +2643,16 @@@ static int mlx4_xdp_set(struct net_devi
                        if (IS_ERR(prog))
                                return PTR_ERR(prog);
                }
 +              mutex_lock(&mdev->state_lock);
                for (i = 0; i < priv->rx_ring_num; i++) {
 -                      /* This xchg is paired with READ_ONCE in the fastpath */
 -                      old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
 +                      old_prog = rcu_dereference_protected(
 +                                      priv->rx_ring[i]->xdp_prog,
 +                                      lockdep_is_held(&mdev->state_lock));
 +                      rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
                        if (old_prog)
                                bpf_prog_put(old_prog);
                }
 +              mutex_unlock(&mdev->state_lock);
                return 0;
        }
  
                                                        priv->xdp_ring_num);
  
        for (i = 0; i < priv->rx_ring_num; i++) {
 -              old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
 +              old_prog = rcu_dereference_protected(
 +                                      priv->rx_ring[i]->xdp_prog,
 +                                      lockdep_is_held(&mdev->state_lock));
 +              rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
                if (old_prog)
                        bpf_prog_put(old_prog);
        }
@@@ -3055,9 -3049,6 +3056,6 @@@ int mlx4_en_init_netdev(struct mlx4_en_
        struct mlx4_en_priv *priv;
        int i;
        int err;
- #ifdef CONFIG_MLX4_EN_DCB
-       struct tc_configuration *tc;
- #endif
  
        dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
                                 MAX_TX_RINGS, MAX_RX_RINGS);
        priv->msg_enable = MLX4_EN_MSG_LEVEL;
  #ifdef CONFIG_MLX4_EN_DCB
        if (!mlx4_is_slave(priv->mdev->dev)) {
-               priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE |
-                                           DCB_CAP_DCBX_HOST |
-                                           DCB_CAP_DCBX_VER_IEEE;
+               priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST |
+                       DCB_CAP_DCBX_VER_IEEE;
                priv->flags |= MLX4_EN_DCB_ENABLED;
-               priv->cee_params.dcb_cfg.pfc_state = false;
+               priv->cee_config.pfc_state = false;
  
-               for (i = 0; i < MLX4_EN_NUM_UP; i++) {
-                       tc = &priv->cee_params.dcb_cfg.tc_config[i];
-                       tc->dcb_pfc = pfc_disabled;
-               }
+               for (i = 0; i < MLX4_EN_NUM_UP; i++)
+                       priv->cee_config.dcb_pfc[i] = pfc_disabled;
  
                if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
                        dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
@@@ -340,7 -340,7 +340,7 @@@ struct mlx4_en_rx_ring 
        u8  fcs_del;
        void *buf;
        void *rx_info;
 -      struct bpf_prog *xdp_prog;
 +      struct bpf_prog __rcu *xdp_prog;
        struct mlx4_en_page_cache page_cache;
        unsigned long bytes;
        unsigned long packets;
@@@ -482,20 -482,10 +482,10 @@@ enum dcb_pfc_type 
        pfc_enabled_rx
  };
  
- struct tc_configuration {
-       enum dcb_pfc_type  dcb_pfc;
- };
  struct mlx4_en_cee_config {
        bool    pfc_state;
-       struct  tc_configuration tc_config[MLX4_EN_NUM_UP];
+       enum    dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
  };
- struct mlx4_en_cee_params {
-       u8 dcbx_cap;
-       struct mlx4_en_cee_config dcb_cfg;
- };
  #endif
  
  struct ethtool_flow_id {
@@@ -624,7 -614,8 +614,8 @@@ struct mlx4_en_priv 
        struct ieee_ets ets;
        u16 maxrate[IEEE_8021QAZ_MAX_TCS];
        enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS];
-       struct mlx4_en_cee_params cee_params;
+       struct mlx4_en_cee_config cee_config;
+       u8 dcbx_cap;
  #endif
  #ifdef CONFIG_RFS_ACCEL
        spinlock_t filters_lock;
@@@ -331,7 -331,7 +331,7 @@@ static void mlx5e_get_ethtool_stats(str
        if (mlx5e_query_global_pause_combined(priv)) {
                for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
                        data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
-                                                         pport_per_prio_pfc_stats_desc, 0);
+                                                         pport_per_prio_pfc_stats_desc, i);
                }
        }
  
@@@ -659,9 -659,10 +659,10 @@@ out
  static void ptys2ethtool_supported_link(unsigned long *supported_modes,
                                        u32 eth_proto_cap)
  {
+       unsigned long proto_cap = eth_proto_cap;
        int proto;
  
-       for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+       for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
                bitmap_or(supported_modes, supported_modes,
                          ptys2ethtool_table[proto].supported,
                          __ETHTOOL_LINK_MODE_MASK_NBITS);
  static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
                                    u32 eth_proto_cap)
  {
+       unsigned long proto_cap = eth_proto_cap;
        int proto;
  
-       for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+       for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
                bitmap_or(advertising_modes, advertising_modes,
                          ptys2ethtool_table[proto].advertised,
                          __ETHTOOL_LINK_MODE_MASK_NBITS);
@@@ -803,7 -805,7 +805,7 @@@ static int mlx5e_get_link_ksettings(str
  {
        struct mlx5e_priv *priv    = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
 -      u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 +      u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
        u32 eth_proto_cap;
        u32 eth_proto_admin;
        u32 eth_proto_lp;
        int err;
  
        err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
 -
        if (err) {
                netdev_err(netdev, "%s: query port ptys failed: %d\n",
                           __func__, err);
@@@ -56,6 -56,7 +56,7 @@@
  #include <generated/utsrelease.h>
  #include <net/pkt_cls.h>
  #include <net/tc_act/tc_mirred.h>
+ #include <net/netevent.h>
  
  #include "spectrum.h"
  #include "core.h"
@@@ -555,9 -556,8 +556,9 @@@ int mlxsw_sp_port_vid_to_fid_set(struc
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
  }
  
 -static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
 -                                        u16 vid, bool learn_enable)
 +int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
 +                                   u16 vid_begin, u16 vid_end,
 +                                   bool learn_enable)
  {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char *spvmlr_pl;
        spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL);
        if (!spvmlr_pl)
                return -ENOMEM;
 -      mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid,
 -                            learn_enable);
 +      mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid_begin,
 +                            vid_end, learn_enable);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl);
        kfree(spvmlr_pl);
        return err;
  }
  
 +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
 +                                        u16 vid, bool learn_enable)
 +{
 +      return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
 +                                              learn_enable);
 +}
 +
  static int
  mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
  {
@@@ -981,6 -974,10 +982,6 @@@ static int mlxsw_sp_port_add_vid(struc
                        goto err_port_vp_mode_trans;
        }
  
 -      err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
 -      if (err)
 -              goto err_port_vid_learning_set;
 -
        err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
        if (err)
                goto err_port_add_vid;
        return 0;
  
  err_port_add_vid:
 -      mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 -err_port_vid_learning_set:
        if (list_is_singular(&mlxsw_sp_port->vports_list))
                mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
  err_port_vp_mode_trans:
@@@ -1014,6 -1013,8 +1015,6 @@@ static int mlxsw_sp_port_kill_vid(struc
  
        mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
  
 -      mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
 -
        /* Drop FID reference. If this was the last reference the
         * resources will be freed.
         */
@@@ -2105,6 -2106,13 +2106,13 @@@ static int mlxsw_sp_port_create(struct 
        dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
        dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops;
  
+       err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
+                       mlxsw_sp_port->local_port);
+               goto err_port_swid_set;
+       }
        err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n",
                goto err_port_system_port_mapping_set;
        }
  
-       err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
-                       mlxsw_sp_port->local_port);
-               goto err_port_swid_set;
-       }
        err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n",
@@@ -2218,10 -2219,10 +2219,10 @@@ err_port_buffers_init
  err_port_admin_status_set:
  err_port_mtu_set:
  err_port_speed_by_width_set:
-       mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
- err_port_swid_set:
  err_port_system_port_mapping_set:
  err_dev_addr_init:
+       mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
+ err_port_swid_set:
        free_percpu(mlxsw_sp_port->pcpu_stats);
  err_alloc_stats:
        kfree(mlxsw_sp_port->untagged_vlans);
@@@ -2570,47 -2571,123 +2571,47 @@@ static void mlxsw_sp_rx_listener_func(s
        netif_receive_skb(skb);
  }
  
 +static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
 +                                         void *priv)
 +{
 +      skb->offload_fwd_mark = 1;
 +      return mlxsw_sp_rx_listener_func(skb, local_port, priv);
 +}
 +
 +#define MLXSW_SP_RXL(_func, _trap_id, _action)                        \
 +      {                                                       \
 +              .func = _func,                                  \
 +              .local_port = MLXSW_PORT_DONT_CARE,             \
 +              .trap_id = MLXSW_TRAP_ID_##_trap_id,            \
 +              .action = MLXSW_REG_HPKT_ACTION_##_action,      \
 +      }
 +
  static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_FDB_MC,
 -      },
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU),
        /* Traps for specific L2 packet types, not trapped as FDB MC */
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_STP,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_LACP,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_EAPOL,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_LLDP,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_MMRP,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_MVRP,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_RPVST,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_DHCP,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_IGMP_QUERY,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_ARPBC,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_ARPUC,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_MTUERROR,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_TTLERROR,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_LBERROR,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_OSPF,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_IP2ME,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0,
 -      },
 -      {
 -              .func = mlxsw_sp_rx_listener_func,
 -              .local_port = MLXSW_PORT_DONT_CARE,
 -              .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4,
 -      },
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU),
 +      /* L3 traps */
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU),
 +      MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU),
  };
  
  static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
                if (err)
                        goto err_rx_listener_register;
  
 -              mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
 +              mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action,
                                    mlxsw_sp_rx_listener[i].trap_id);
                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
                if (err)
@@@ -4465,18 -4542,26 +4466,26 @@@ static struct notifier_block mlxsw_sp_i
        .priority = 10, /* Must be called before FIB notifier block */
  };
  
+ static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
+       .notifier_call = mlxsw_sp_router_netevent_event,
+ };
  static int __init mlxsw_sp_module_init(void)
  {
        int err;
  
        register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
        register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+       register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
        err = mlxsw_core_driver_register(&mlxsw_sp_driver);
        if (err)
                goto err_core_driver_register;
        return 0;
  
  err_core_driver_register:
+       unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+       unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
        unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
        return err;
  }
  static void __exit mlxsw_sp_module_exit(void)
  {
        mlxsw_core_driver_unregister(&mlxsw_sp_driver);
+       unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
        unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
  }
@@@ -558,9 -558,6 +558,9 @@@ int __mlxsw_sp_port_headroom_set(struc
  int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                  enum mlxsw_reg_qeec_hr hr, u8 index,
                                  u8 next_index, u32 maxrate);
 +int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
 +                                   u16 vid_begin, u16 vid_end,
 +                                   bool learn_enable);
  
  #ifdef CONFIG_MLXSW_SPECTRUM_DCB
  
@@@ -590,6 -587,8 +590,8 @@@ int mlxsw_sp_router_neigh_construct(str
                                    struct neighbour *n);
  void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
                                   struct neighbour *n);
+ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+                                  unsigned long event, void *ptr);
  
  int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
  void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
@@@ -167,8 -167,8 +167,8 @@@ static int mlxsw_sp_port_attr_stp_state
  }
  
  static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
-                                    u16 idx_begin, u16 idx_end, bool set,
-                                    bool only_uc)
+                                    u16 idx_begin, u16 idx_end, bool uc_set,
+                                    bool bm_set)
  {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        u16 local_port = mlxsw_sp_port->local_port;
                return -ENOMEM;
  
        mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
-                           table_type, range, local_port, set);
+                           table_type, range, local_port, uc_set);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
        if (err)
                goto buffer_out;
  
-       /* Flooding control allows one to decide whether a given port will
-        * flood unicast traffic for which there is no FDB entry.
-        */
-       if (only_uc)
-               goto buffer_out;
        mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin,
-                           table_type, range, local_port, set);
+                           table_type, range, local_port, bm_set);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
        if (err)
                goto err_flood_bm_set;
-       else
-               goto buffer_out;
+       goto buffer_out;
  
  err_flood_bm_set:
        mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
-                           table_type, range, local_port, !set);
+                           table_type, range, local_port, !uc_set);
        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
  buffer_out:
        kfree(sftr_pl);
@@@ -257,44 -251,15 +251,43 @@@ int mlxsw_sp_vport_flood_set(struct mlx
         * the start of the vFIDs range.
         */
        vfid = mlxsw_sp_fid_to_vfid(fid);
-       return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set,
-                                        false);
+       return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set);
  }
  
 +static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
 +                                    bool set)
 +{
 +      u16 vid;
 +      int err;
 +
 +      if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
 +              vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
 +
 +              return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
 +                                                      set);
 +      }
 +
 +      for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) {
 +              err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
 +                                                     set);
 +              if (err)
 +                      goto err_port_vid_learning_set;
 +      }
 +
 +      return 0;
 +
 +err_port_vid_learning_set:
 +      for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
 +              __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, !set);
 +      return err;
 +}
 +
  static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
                                           struct switchdev_trans *trans,
                                           unsigned long brport_flags)
  {
 +      unsigned long learning = mlxsw_sp_port->learning ? BR_LEARNING : 0;
        unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0;
 -      bool set;
        int err;
  
        if (!mlxsw_sp_port->bridged)
                return 0;
  
        if ((uc_flood ^ brport_flags) & BR_FLOOD) {
 -              set = mlxsw_sp_port->uc_flood ? false : true;
 -              err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, set);
 +              err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port,
 +                                               !mlxsw_sp_port->uc_flood);
                if (err)
                        return err;
        }
  
 +      if ((learning ^ brport_flags) & BR_LEARNING) {
 +              err = mlxsw_sp_port_learning_set(mlxsw_sp_port,
 +                                               !mlxsw_sp_port->learning);
 +              if (err)
 +                      goto err_port_learning_set;
 +      }
 +
        mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0;
        mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0;
        mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0;
  
        return 0;
 +
 +err_port_learning_set:
 +      if ((uc_flood ^ brport_flags) & BR_FLOOD)
 +              mlxsw_sp_port_uc_flood_set(mlxsw_sp_port,
 +                                         mlxsw_sp_port->uc_flood);
 +      return err;
  }
  
  static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time)
@@@ -501,6 -453,9 +494,9 @@@ static int __mlxsw_sp_port_fid_join(str
  {
        struct mlxsw_sp_fid *f;
  
+       if (test_bit(fid, mlxsw_sp_port->active_vlans))
+               return 0;
        f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid);
        if (!f) {
                f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid);
@@@ -558,7 -513,7 +554,7 @@@ static int mlxsw_sp_port_fid_join(struc
        }
  
        err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end,
-                                       true, false);
+                                       mlxsw_sp_port->uc_flood, true);
        if (err)
                goto err_port_flood_set;
  
@@@ -676,27 -631,6 +672,27 @@@ static int __mlxsw_sp_port_vlans_set(st
        return 0;
  }
  
 +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
 +                                        u16 vid_begin, u16 vid_end,
 +                                        bool learn_enable)
 +{
 +      u16 vid, vid_e;
 +      int err;
 +
 +      for (vid = vid_begin; vid <= vid_end;
 +           vid += MLXSW_REG_SPVMLR_REC_MAX_COUNT) {
 +              vid_e = min((u16) (vid + MLXSW_REG_SPVMLR_REC_MAX_COUNT - 1),
 +                          vid_end);
 +
 +              err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid,
 +                                                     vid_e, learn_enable);
 +              if (err)
 +                      return err;
 +      }
 +
 +      return 0;
 +}
 +
  static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
                                     u16 vid_begin, u16 vid_end,
                                     bool flag_untagged, bool flag_pvid)
                }
        }
  
 +      err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
 +                                           mlxsw_sp_port->learning);
 +      if (err) {
 +              netdev_err(dev, "Failed to set learning for VIDs %d-%d\n",
 +                         vid_begin, vid_end);
 +              goto err_port_vid_learning_set;
 +      }
 +
        /* Changing activity bits only if HW operation succeded */
        for (vid = vid_begin; vid <= vid_end; vid++) {
                set_bit(vid, mlxsw_sp_port->active_vlans);
  err_port_stp_state_set:
        for (vid = vid_begin; vid <= vid_end; vid++)
                clear_bit(vid, mlxsw_sp_port->active_vlans);
 +      mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
 +                                     false);
 +err_port_vid_learning_set:
        if (old_pvid != mlxsw_sp_port->pvid)
                mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid);
  err_port_pvid_set:
@@@ -1074,20 -997,29 +1070,20 @@@ static int mlxsw_sp_port_obj_add(struc
  static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
                                     u16 vid_begin, u16 vid_end)
  {
 -      struct net_device *dev = mlxsw_sp_port->dev;
        u16 vid, pvid;
 -      int err;
  
        if (!mlxsw_sp_port->bridged)
                return -EINVAL;
  
 -      err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
 -                                      false, false);
 -      if (err) {
 -              netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin,
 -                         vid_end);
 -              return err;
 -      }
 +      mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
 +                                     false);
  
        pvid = mlxsw_sp_port->pvid;
 -      if (pvid >= vid_begin && pvid <= vid_end) {
 -              err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
 -              if (err) {
 -                      netdev_err(dev, "Unable to del PVID %d\n", pvid);
 -                      return err;
 -              }
 -      }
 +      if (pvid >= vid_begin && pvid <= vid_end)
 +              mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
 +
 +      __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false,
 +                                false);
  
        mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
  
@@@ -1430,6 -1362,8 +1426,6 @@@ static void mlxsw_sp_fdb_notify_mac_pro
                vid = fid;
        }
  
 -      adding = adding && mlxsw_sp_port->learning;
 -
  do_fdb_op:
        err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid,
                                      adding, true);
@@@ -1491,6 -1425,8 +1487,6 @@@ static void mlxsw_sp_fdb_notify_mac_lag
                vid = fid;
        }
  
 -      adding = adding && mlxsw_sp_port->learning;
 -
  do_fdb_op:
        err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
                                          adding, true);
@@@ -1556,18 -1492,20 +1552,18 @@@ static void mlxsw_sp_fdb_notify_work(st
        mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
  
        rtnl_lock();
 -      do {
 -              mlxsw_reg_sfn_pack(sfn_pl);
 -              err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
 -              if (err) {
 -                      dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
 -                      break;
 -              }
 -              num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
 -              for (i = 0; i < num_rec; i++)
 -                      mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 +      mlxsw_reg_sfn_pack(sfn_pl);
 +      err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
 +      if (err) {
 +              dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
 +              goto out;
 +      }
 +      num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
 +      for (i = 0; i < num_rec; i++)
 +              mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
  
 -      } while (num_rec);
 +out:
        rtnl_unlock();
 -
        kfree(sfn_pl);
        mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
  }
@@@ -19,6 -19,7 +19,7 @@@
  #include "qed_dcbx.h"
  #include "qed_hsi.h"
  #include "qed_sp.h"
+ #include "qed_sriov.h"
  #ifdef CONFIG_DCB
  #include <linux/qed/qed_eth_if.h>
  #endif
@@@ -874,8 -875,11 +875,8 @@@ int qed_dcbx_info_alloc(struct qed_hwf
        int rc = 0;
  
        p_hwfn->p_dcbx_info = kzalloc(sizeof(*p_hwfn->p_dcbx_info), GFP_KERNEL);
 -      if (!p_hwfn->p_dcbx_info) {
 -              DP_NOTICE(p_hwfn,
 -                        "Failed to allocate 'struct qed_dcbx_info'\n");
 +      if (!p_hwfn->p_dcbx_info)
                rc = -ENOMEM;
 -      }
  
        return rc;
  }
@@@ -942,6 -946,9 +943,9 @@@ static int qed_dcbx_query_params(struc
        struct qed_ptt *p_ptt;
        int rc;
  
+       if (IS_VF(p_hwfn->cdev))
+               return -EINVAL;
        p_ptt = qed_ptt_acquire(p_hwfn);
        if (!p_ptt)
                return -EBUSY;
@@@ -981,6 -988,7 +985,7 @@@ qed_dcbx_set_pfc_data(struct qed_hwfn *
                if (p_params->pfc.prio[i])
                        pfc_map |= BIT(i);
  
+       *pfc &= ~DCBX_PFC_PRI_EN_BITMAP_MASK;
        *pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_SHIFT);
  
        DP_VERBOSE(p_hwfn, QED_MSG_DCB, "pfc = 0x%x\n", *pfc);
@@@ -1055,24 -1063,33 +1060,33 @@@ qed_dcbx_set_app_data(struct qed_hwfn *
  
        for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
                entry = &p_app->app_pri_tbl[i].entry;
+               *entry = 0;
                if (ieee) {
-                       *entry &= ~DCBX_APP_SF_IEEE_MASK;
+                       *entry &= ~(DCBX_APP_SF_IEEE_MASK | DCBX_APP_SF_MASK);
                        switch (p_params->app_entry[i].sf_ieee) {
                        case QED_DCBX_SF_IEEE_ETHTYPE:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        case QED_DCBX_SF_IEEE_TCP_PORT:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        case QED_DCBX_SF_IEEE_UDP_PORT:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
                                *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
                                           DCBX_APP_SF_IEEE_SHIFT);
+                               *entry |= ((u32)DCBX_APP_SF_PORT <<
+                                          DCBX_APP_SF_SHIFT);
                                break;
                        }
                } else {
@@@ -1172,9 -1189,11 +1186,9 @@@ int qed_dcbx_get_config_params(struct q
                return 0;
        }
  
-       dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
+       dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
 -      if (!dcbx_info) {
 -              DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n");
 +      if (!dcbx_info)
                return -ENOMEM;
 -      }
  
        rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB);
        if (rc) {
@@@ -1207,9 -1226,11 +1221,9 @@@ static struct qed_dcbx_get *qed_dcbnl_g
  {
        struct qed_dcbx_get *dcbx_info;
  
-       dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
+       dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
 -      if (!dcbx_info) {
 -              DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n");
 +      if (!dcbx_info)
                return NULL;
 -      }
  
        if (qed_dcbx_query_params(hwfn, dcbx_info, type)) {
                kfree(dcbx_info);
@@@ -1961,7 -1982,6 +1975,7 @@@ static int qed_dcbnl_get_ieee_pfc(struc
  
        if (!dcbx_info->operational.ieee) {
                DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
 +              kfree(dcbx_info);
                return -EINVAL;
        }
  
@@@ -2130,19 -2150,17 +2144,19 @@@ static int qed_dcbnl_ieee_setets(struc
        return rc;
  }
  
 -int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
 +static int
 +qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
  {
        return qed_dcbnl_get_ieee_ets(cdev, ets, true);
  }
  
 -int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
 +static int
 +qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
  {
        return qed_dcbnl_get_ieee_pfc(cdev, pfc, true);
  }
  
 -int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
 +static int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
  {
        struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
        struct qed_dcbx_get *dcbx_info;
        return 0;
  }
  
 -int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
 +static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
  {
        struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
        struct qed_dcbx_get *dcbx_info;
@@@ -222,7 -222,7 +222,7 @@@ int __init qede_init(void
  {
        int ret;
  
 -      pr_notice("qede_init: %s\n", version);
 +      pr_info("qede_init: %s\n", version);
  
        qed_ops = qed_get_eth_ops();
        if (!qed_ops) {
  
  static void __exit qede_cleanup(void)
  {
 -      pr_notice("qede_cleanup called\n");
 +      if (debug & QED_LOG_INFO_MASK)
 +              pr_info("qede_cleanup called\n");
  
        unregister_netdevice_notifier(&qede_netdev_notifier);
        pci_unregister_driver(&qede_pci_driver);
@@@ -271,7 -270,8 +271,7 @@@ module_exit(qede_cleanup)
  
  /* Unmap the data and free skb */
  static int qede_free_tx_pkt(struct qede_dev *edev,
 -                          struct qede_tx_queue *txq,
 -                          int *len)
 +                          struct qede_tx_queue *txq, int *len)
  {
        u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
        struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
  static void qede_free_failed_tx_pkt(struct qede_dev *edev,
                                    struct qede_tx_queue *txq,
                                    struct eth_tx_1st_bd *first_bd,
 -                                  int nbd,
 -                                  bool data_split)
 +                                  int nbd, bool data_split)
  {
        u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
        struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
  
        /* Return prod to its position before this skb was handled */
        qed_chain_set_prod(&txq->tx_pbl,
 -                         le16_to_cpu(txq->tx_db.data.bd_prod),
 -                         first_bd);
 +                         le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
  
        first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
  
  
        /* Return again prod to its position before this skb was handled */
        qed_chain_set_prod(&txq->tx_pbl,
 -                         le16_to_cpu(txq->tx_db.data.bd_prod),
 -                         first_bd);
 +                         le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
  
        /* Free skb */
        dev_kfree_skb_any(skb);
  }
  
  static u32 qede_xmit_type(struct qede_dev *edev,
 -                        struct sk_buff *skb,
 -                        int *ipv6_ext)
 +                        struct sk_buff *skb, int *ipv6_ext)
  {
        u32 rc = XMIT_L4_CSUM;
        __be16 l3_proto;
@@@ -430,13 -434,15 +430,13 @@@ static void qede_set_params_for_ipv6_ex
  }
  
  static int map_frag_to_bd(struct qede_dev *edev,
 -                        skb_frag_t *frag,
 -                        struct eth_tx_bd *bd)
 +                        skb_frag_t *frag, struct eth_tx_bd *bd)
  {
        dma_addr_t mapping;
  
        /* Map skb non-linear frag data for DMA */
        mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0,
 -                                 skb_frag_size(frag),
 -                                 DMA_TO_DEVICE);
 +                                 skb_frag_size(frag), DMA_TO_DEVICE);
        if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
                DP_NOTICE(edev, "Unable to map frag - dropping packet\n");
                return -ENOMEM;
@@@ -498,8 -504,9 +498,8 @@@ static inline void qede_update_tx_produ
  }
  
  /* Main transmit function */
 -static
 -netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 -                          struct net_device *ndev)
 +static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 +                                 struct net_device *ndev)
  {
        struct qede_dev *edev = netdev_priv(ndev);
        struct netdev_queue *netdev_txq;
  
        /* Get tx-queue context and netdev index */
        txq_index = skb_get_queue_mapping(skb);
 -      WARN_ON(txq_index >= QEDE_TSS_CNT(edev));
 +      WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
        txq = QEDE_TX_QUEUE(edev, txq_index);
        netdev_txq = netdev_get_tx_queue(ndev, txq_index);
  
 -      WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) <
 -                             (MAX_SKB_FRAGS + 1));
 +      WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
  
        xmit_type = qede_xmit_type(edev, skb, &ipv6_ext);
  
                            1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
                }
  
 +              /* Legacy FW had flipped behavior in regard to this bit -
 +               * I.e., needed to set to prevent FW from touching encapsulated
 +               * packets when it didn't need to.
 +               */
 +              if (unlikely(txq->is_legacy))
 +                      first_bd->data.bitfields ^=
 +                          1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
 +
                /* If the packet is IPv6 with extension header, indicate that
                 * to FW and pass few params, since the device cracker doesn't
                 * support parsing IPv6 with extension header/s.
                        qede_update_tx_producer(txq);
  
                netif_tx_stop_queue(netdev_txq);
 +              txq->stopped_cnt++;
                DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED,
                           "Stop queue was called\n");
                /* paired memory barrier is in qede_tx_int(), we have to keep
@@@ -765,7 -764,8 +765,7 @@@ int qede_txq_has_work(struct qede_tx_qu
        return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl);
  }
  
 -static int qede_tx_int(struct qede_dev *edev,
 -                     struct qede_tx_queue *txq)
 +static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
  {
        struct netdev_queue *netdev_txq;
        u16 hw_bd_cons;
                bytes_compl += len;
                pkts_compl++;
                txq->sw_tx_cons++;
 +              txq->xmit_pkts++;
        }
  
        netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
@@@ -964,7 -963,8 +964,7 @@@ static inline void qede_update_rx_prod(
  
  static u32 qede_get_rxhash(struct qede_dev *edev,
                           u8 bitfields,
 -                         __le32 rss_hash,
 -                         enum pkt_hash_types *rxhash_type)
 +                         __le32 rss_hash, enum pkt_hash_types *rxhash_type)
  {
        enum rss_hash_type htype;
  
@@@ -993,10 -993,12 +993,10 @@@ static void qede_set_skb_csum(struct sk
  
  static inline void qede_skb_receive(struct qede_dev *edev,
                                    struct qede_fastpath *fp,
 -                                  struct sk_buff *skb,
 -                                  u16 vlan_tag)
 +                                  struct sk_buff *skb, u16 vlan_tag)
  {
        if (vlan_tag)
 -              __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 -                                     vlan_tag);
 +              __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
  
        napi_gro_receive(&fp->napi, skb);
  }
@@@ -1019,7 -1021,8 +1019,7 @@@ static void qede_set_gro_params(struct 
  
  static int qede_fill_frag_skb(struct qede_dev *edev,
                              struct qede_rx_queue *rxq,
 -                            u8 tpa_agg_index,
 -                            u16 len_on_bd)
 +                            u8 tpa_agg_index, u16 len_on_bd)
  {
        struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons &
                                                         NUM_RX_BDS_MAX];
@@@ -1206,7 -1209,7 +1206,7 @@@ static void qede_gro_receive(struct qed
  #endif
  
  send_skb:
 -      skb_record_rx_queue(skb, fp->rss_id);
 +      skb_record_rx_queue(skb, fp->rxq->rxq_id);
        qede_skb_receive(edev, fp, skb, vlan_tag);
  }
  
@@@ -1410,7 -1413,7 +1410,7 @@@ static int qede_rx_int(struct qede_fast
  
                if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
                        edev->ops->eth_cqe_completion(
 -                                      edev->cdev, fp->rss_id,
 +                                      edev->cdev, fp->id,
                                        (struct eth_slow_path_rx_cqe *)cqe);
                        goto next_cqe;
                }
@@@ -1467,7 -1470,7 +1467,7 @@@ alloc_skb
                skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
                if (unlikely(!skb)) {
                        DP_NOTICE(edev,
 -                                "Build_skb failed, dropping incoming packet\n");
 +                                "skb allocation failed, dropping incoming packet\n");
                        qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
                        rxq->rx_alloc_errors++;
                        goto next_cqe;
                skb->protocol = eth_type_trans(skb, edev->ndev);
  
                rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields,
 -                                        fp_cqe->rss_hash,
 -                                        &rxhash_type);
 +                                        fp_cqe->rss_hash, &rxhash_type);
  
                skb_set_hash(skb, rx_hash, rxhash_type);
  
                qede_set_skb_csum(skb, csum_flag);
  
 -              skb_record_rx_queue(skb, fp->rss_id);
 +              skb_record_rx_queue(skb, fp->rxq->rxq_id);
  
                qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag));
  next_rx_only:
@@@ -1600,8 -1604,6 +1600,8 @@@ next_cqe: /* don't consume bd rx buffe
        /* Update producers */
        qede_update_rx_prod(edev, rxq);
  
 +      rxq->rcv_pkts += rx_pkt;
 +
        return rx_pkt;
  }
  
@@@ -1614,12 -1616,10 +1614,12 @@@ static int qede_poll(struct napi_struc
        u8 tc;
  
        for (tc = 0; tc < edev->num_tc; tc++)
 -              if (qede_txq_has_work(&fp->txqs[tc]))
 +              if (likely(fp->type & QEDE_FASTPATH_TX) &&
 +                  qede_txq_has_work(&fp->txqs[tc]))
                        qede_tx_int(edev, &fp->txqs[tc]);
  
 -      rx_work_done = qede_has_rx_work(fp->rxq) ?
 +      rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
 +                      qede_has_rx_work(fp->rxq)) ?
                        qede_rx_int(fp, budget) : 0;
        if (rx_work_done < budget) {
                qed_sb_update_sb_idx(fp->sb_info);
                rmb();
  
                /* Fall out from the NAPI loop if needed */
 -              if (!(qede_has_rx_work(fp->rxq) ||
 -                    qede_has_tx_work(fp))) {
 +              if (!((likely(fp->type & QEDE_FASTPATH_RX) &&
 +                     qede_has_rx_work(fp->rxq)) ||
 +                    (likely(fp->type & QEDE_FASTPATH_TX) &&
 +                     qede_has_tx_work(fp)))) {
                        napi_complete(napi);
  
                        /* Update and reenable interrupts */
@@@ -1713,8 -1711,6 +1713,8 @@@ void qede_fill_by_demand_stats(struct q
  
        edev->ops->get_vport_stats(edev->cdev, &stats);
        edev->stats.no_buff_discards = stats.no_buff_discards;
 +      edev->stats.packet_too_big_discard = stats.packet_too_big_discard;
 +      edev->stats.ttl0_discard = stats.ttl0_discard;
        edev->stats.rx_ucast_bytes = stats.rx_ucast_bytes;
        edev->stats.rx_mcast_bytes = stats.rx_mcast_bytes;
        edev->stats.rx_bcast_bytes = stats.rx_bcast_bytes;
        edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames;
  }
  
 -static struct rtnl_link_stats64 *qede_get_stats64(
 -                          struct net_device *dev,
 -                          struct rtnl_link_stats64 *stats)
 +static
 +struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev,
 +                                         struct rtnl_link_stats64 *stats)
  {
        struct qede_dev *edev = netdev_priv(dev);
  
@@@ -2110,13 -2106,14 +2110,13 @@@ static void qede_vlan_mark_nonconfigure
                }
  
                DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
 -                         "marked vlan %d as non-configured\n",
 -                         vlan->vid);
 +                         "marked vlan %d as non-configured\n", vlan->vid);
        }
  
        edev->accept_any_vlan = false;
  }
  
 -int qede_set_features(struct net_device *dev, netdev_features_t features)
 +static int qede_set_features(struct net_device *dev, netdev_features_t features)
  {
        struct qede_dev *edev = netdev_priv(dev);
        netdev_features_t changes = features ^ dev->features;
@@@ -2152,7 -2149,7 +2152,7 @@@ static void qede_udp_tunnel_add(struct 
  
                edev->vxlan_dst_port = t_port;
  
 -              DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d",
 +              DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d\n",
                           t_port);
  
                set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
  
                edev->geneve_dst_port = t_port;
  
 -              DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d",
 +              DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d\n",
                           t_port);
                set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
                break;
@@@ -2187,7 -2184,7 +2187,7 @@@ static void qede_udp_tunnel_del(struct 
  
                edev->vxlan_dst_port = 0;
  
 -              DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d",
 +              DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d\n",
                           t_port);
  
                set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
  
                edev->geneve_dst_port = 0;
  
 -              DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d",
 +              DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d\n",
                           t_port);
                set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
                break;
@@@ -2243,13 -2240,15 +2243,13 @@@ static const struct net_device_ops qede
  static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
                                            struct pci_dev *pdev,
                                            struct qed_dev_eth_info *info,
 -                                          u32 dp_module,
 -                                          u8 dp_level)
 +                                          u32 dp_module, u8 dp_level)
  {
        struct net_device *ndev;
        struct qede_dev *edev;
  
        ndev = alloc_etherdev_mqs(sizeof(*edev),
 -                                info->num_queues,
 -                                info->num_queues);
 +                                info->num_queues, info->num_queues);
        if (!ndev) {
                pr_err("etherdev allocation failed\n");
                return NULL;
        edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
        edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
  
 +      DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
 +              info->num_queues, info->num_queues);
 +
        SET_NETDEV_DEV(ndev, &pdev->dev);
  
        memset(&edev->stats, 0, sizeof(edev->stats));
@@@ -2356,7 -2352,7 +2356,7 @@@ static void qede_free_fp_array(struct q
                struct qede_fastpath *fp;
                int i;
  
 -              for_each_rss(i) {
 +              for_each_queue(i) {
                        fp = &edev->fp_array[i];
  
                        kfree(fp->sb_info);
                }
                kfree(edev->fp_array);
        }
 -      edev->num_rss = 0;
 +
 +      edev->num_queues = 0;
 +      edev->fp_num_tx = 0;
 +      edev->fp_num_rx = 0;
  }
  
  static int qede_alloc_fp_array(struct qede_dev *edev)
  {
 +      u8 fp_combined, fp_rx = edev->fp_num_rx;
        struct qede_fastpath *fp;
        int i;
  
 -      edev->fp_array = kcalloc(QEDE_RSS_CNT(edev),
 +      edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev),
                                 sizeof(*edev->fp_array), GFP_KERNEL);
        if (!edev->fp_array) {
                DP_NOTICE(edev, "fp array allocation failed\n");
                goto err;
        }
  
 -      for_each_rss(i) {
 +      fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx;
 +
 +      /* Allocate the FP elements for Rx queues followed by combined and then
 +       * the Tx. This ordering should be maintained so that the respective
 +       * queues (Rx or Tx) will be together in the fastpath array and the
 +       * associated ids will be sequential.
 +       */
 +      for_each_queue(i) {
                fp = &edev->fp_array[i];
  
                fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
                        goto err;
                }
  
 -              fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
 -              if (!fp->rxq) {
 -                      DP_NOTICE(edev, "RXQ struct allocation failed\n");
 -                      goto err;
 +              if (fp_rx) {
 +                      fp->type = QEDE_FASTPATH_RX;
 +                      fp_rx--;
 +              } else if (fp_combined) {
 +                      fp->type = QEDE_FASTPATH_COMBINED;
 +                      fp_combined--;
 +              } else {
 +                      fp->type = QEDE_FASTPATH_TX;
                }
  
 -              fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), GFP_KERNEL);
 -              if (!fp->txqs) {
 -                      DP_NOTICE(edev, "TXQ array allocation failed\n");
 -                      goto err;
 +              if (fp->type & QEDE_FASTPATH_TX) {
 +                      fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs),
 +                                         GFP_KERNEL);
 +                      if (!fp->txqs) {
 +                              DP_NOTICE(edev,
 +                                        "TXQ array allocation failed\n");
 +                              goto err;
 +                      }
 +              }
 +
 +              if (fp->type & QEDE_FASTPATH_RX) {
 +                      fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
 +                      if (!fp->rxq) {
 +                              DP_NOTICE(edev,
 +                                        "RXQ struct allocation failed\n");
 +                              goto err;
 +                      }
                }
        }
  
@@@ -2488,7 -2456,7 +2488,7 @@@ static int __qede_probe(struct pci_dev 
                        bool is_vf, enum qede_probe_mode mode)
  {
        struct qed_probe_params probe_params;
 -      struct qed_slowpath_params params;
 +      struct qed_slowpath_params sp_params;
        struct qed_dev_eth_info dev_info;
        struct qede_dev *edev;
        struct qed_dev *cdev;
        qede_update_pf_params(cdev);
  
        /* Start the Slowpath-process */
 -      memset(&params, 0, sizeof(struct qed_slowpath_params));
 -      params.int_mode = QED_INT_MODE_MSIX;
 -      params.drv_major = QEDE_MAJOR_VERSION;
 -      params.drv_minor = QEDE_MINOR_VERSION;
 -      params.drv_rev = QEDE_REVISION_VERSION;
 -      params.drv_eng = QEDE_ENGINEERING_VERSION;
 -      strlcpy(params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
 -      rc = qed_ops->common->slowpath_start(cdev, &params);
 +      memset(&sp_params, 0, sizeof(sp_params));
 +      sp_params.int_mode = QED_INT_MODE_MSIX;
 +      sp_params.drv_major = QEDE_MAJOR_VERSION;
 +      sp_params.drv_minor = QEDE_MINOR_VERSION;
 +      sp_params.drv_rev = QEDE_REVISION_VERSION;
 +      sp_params.drv_eng = QEDE_ENGINEERING_VERSION;
 +      strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
 +      rc = qed_ops->common->slowpath_start(cdev, &sp_params);
        if (rc) {
                pr_notice("Cannot start slowpath\n");
                goto err1;
        edev->ops->register_ops(cdev, &qede_ll_ops, edev);
  
  #ifdef CONFIG_DCB
-       qede_set_dcbnl_ops(edev->ndev);
+       if (!IS_VF(edev))
+               qede_set_dcbnl_ops(edev->ndev);
  #endif
  
        INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
@@@ -2621,7 -2590,7 +2622,7 @@@ static void __qede_remove(struct pci_de
        qed_ops->common->slowpath_stop(cdev);
        qed_ops->common->remove(cdev);
  
 -      pr_notice("Ending successfully qede_remove\n");
 +      dev_info(&pdev->dev, "Ending qede_remove successfully\n");
  }
  
  static void qede_remove(struct pci_dev *pdev)
@@@ -2640,8 -2609,8 +2641,8 @@@ static int qede_set_num_queues(struct q
        u16 rss_num;
  
        /* Setup queues according to possible resources*/
 -      if (edev->req_rss)
 -              rss_num = edev->req_rss;
 +      if (edev->req_queues)
 +              rss_num = edev->req_queues;
        else
                rss_num = netif_get_num_default_rss_queues() *
                          edev->dev_info.common.num_hwfns;
        rc = edev->ops->common->set_fp_int(edev->cdev, rss_num);
        if (rc > 0) {
                /* Managed to request interrupts for our queues */
 -              edev->num_rss = rc;
 +              edev->num_queues = rc;
                DP_INFO(edev, "Managed %d [of %d] RSS queues\n",
 -                      QEDE_RSS_CNT(edev), rss_num);
 +                      QEDE_QUEUE_CNT(edev), rss_num);
                rc = 0;
        }
 +
 +      edev->fp_num_tx = edev->req_num_tx;
 +      edev->fp_num_rx = edev->req_num_rx;
 +
        return rc;
  }
  
@@@ -2673,14 -2638,16 +2674,14 @@@ static void qede_free_mem_sb(struct qed
  
  /* This function allocates fast-path status block memory */
  static int qede_alloc_mem_sb(struct qede_dev *edev,
 -                           struct qed_sb_info *sb_info,
 -                           u16 sb_id)
 +                           struct qed_sb_info *sb_info, u16 sb_id)
  {
        struct status_block *sb_virt;
        dma_addr_t sb_phys;
        int rc;
  
        sb_virt = dma_alloc_coherent(&edev->pdev->dev,
 -                                   sizeof(*sb_virt),
 -                                   &sb_phys, GFP_KERNEL);
 +                                   sizeof(*sb_virt), &sb_phys, GFP_KERNEL);
        if (!sb_virt) {
                DP_ERR(edev, "Status block allocation failed\n");
                return -ENOMEM;
@@@ -2712,15 -2679,16 +2713,15 @@@ static void qede_free_rx_buffers(struc
                data = rx_buf->data;
  
                dma_unmap_page(&edev->pdev->dev,
 -                             rx_buf->mapping,
 -                             PAGE_SIZE, DMA_FROM_DEVICE);
 +                             rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE);
  
                rx_buf->data = NULL;
                __free_page(data);
        }
  }
  
 -static void qede_free_sge_mem(struct qede_dev *edev,
 -                            struct qede_rx_queue *rxq) {
 +static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 +{
        int i;
  
        if (edev->gro_disable)
        }
  }
  
 -static void qede_free_mem_rxq(struct qede_dev *edev,
 -                            struct qede_rx_queue *rxq)
 +static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
  {
        qede_free_sge_mem(edev, rxq);
  
@@@ -2761,6 -2730,9 +2762,6 @@@ static int qede_alloc_rx_buffer(struct 
        struct eth_rx_bd *rx_bd;
        dma_addr_t mapping;
        struct page *data;
 -      u16 rx_buf_size;
 -
 -      rx_buf_size = rxq->rx_buf_size;
  
        data = alloc_pages(GFP_ATOMIC, 0);
        if (unlikely(!data)) {
        return 0;
  }
  
 -static int qede_alloc_sge_mem(struct qede_dev *edev,
 -                            struct qede_rx_queue *rxq)
 +static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
  {
        dma_addr_t mapping;
        int i;
  }
  
  /* This function allocates all memory needed per Rx queue */
 -static int qede_alloc_mem_rxq(struct qede_dev *edev,
 -                            struct qede_rx_queue *rxq)
 +static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
  {
        int i, rc, size;
  
        rxq->num_rx_buffers = edev->q_num_rx_buffers;
  
 -      rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD +
 -                         edev->ndev->mtu;
 +      rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
 +
        if (rxq->rx_buf_size > PAGE_SIZE)
                rxq->rx_buf_size = PAGE_SIZE;
  
@@@ -2903,7 -2877,8 +2904,7 @@@ err
        return rc;
  }
  
 -static void qede_free_mem_txq(struct qede_dev *edev,
 -                            struct qede_tx_queue *txq)
 +static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
  {
        /* Free the parallel SW ring */
        kfree(txq->sw_tx_ring);
  }
  
  /* This function allocates all memory needed per Tx queue */
 -static int qede_alloc_mem_txq(struct qede_dev *edev,
 -                            struct qede_tx_queue *txq)
 +static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
  {
        int size, rc;
        union eth_tx_bd_types *p_virt;
  }
  
  /* This function frees all memory of a single fp */
 -static void qede_free_mem_fp(struct qede_dev *edev,
 -                           struct qede_fastpath *fp)
 +static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
  {
        int tc;
  
        qede_free_mem_sb(edev, fp->sb_info);
  
 -      qede_free_mem_rxq(edev, fp->rxq);
 +      if (fp->type & QEDE_FASTPATH_RX)
 +              qede_free_mem_rxq(edev, fp->rxq);
  
 -      for (tc = 0; tc < edev->num_tc; tc++)
 -              qede_free_mem_txq(edev, &fp->txqs[tc]);
 +      if (fp->type & QEDE_FASTPATH_TX)
 +              for (tc = 0; tc < edev->num_tc; tc++)
 +                      qede_free_mem_txq(edev, &fp->txqs[tc]);
  }
  
  /* This function allocates all memory needed for a single fp (i.e. an entity
 - * which contains status block, one rx queue and multiple per-TC tx queues.
 + * which contains status block, one rx queue and/or multiple per-TC tx queues.
   */
 -static int qede_alloc_mem_fp(struct qede_dev *edev,
 -                           struct qede_fastpath *fp)
 +static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
  {
        int rc, tc;
  
 -      rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->rss_id);
 -      if (rc)
 -              goto err;
 -
 -      rc = qede_alloc_mem_rxq(edev, fp->rxq);
 +      rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id);
        if (rc)
                goto err;
  
 -      for (tc = 0; tc < edev->num_tc; tc++) {
 -              rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
 +      if (fp->type & QEDE_FASTPATH_RX) {
 +              rc = qede_alloc_mem_rxq(edev, fp->rxq);
                if (rc)
                        goto err;
        }
  
 +      if (fp->type & QEDE_FASTPATH_TX) {
 +              for (tc = 0; tc < edev->num_tc; tc++) {
 +                      rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
 +                      if (rc)
 +                              goto err;
 +              }
 +      }
 +
        return 0;
  err:
        return rc;
@@@ -2993,7 -2965,7 +2994,7 @@@ static void qede_free_mem_load(struct q
  {
        int i;
  
 -      for_each_rss(i) {
 +      for_each_queue(i) {
                struct qede_fastpath *fp = &edev->fp_array[i];
  
                qede_free_mem_fp(edev, fp);
  /* This function allocates all qede memory at NIC load. */
  static int qede_alloc_mem_load(struct qede_dev *edev)
  {
 -      int rc = 0, rss_id;
 +      int rc = 0, queue_id;
  
 -      for (rss_id = 0; rss_id < QEDE_RSS_CNT(edev); rss_id++) {
 -              struct qede_fastpath *fp = &edev->fp_array[rss_id];
 +      for (queue_id = 0; queue_id < QEDE_QUEUE_CNT(edev); queue_id++) {
 +              struct qede_fastpath *fp = &edev->fp_array[queue_id];
  
                rc = qede_alloc_mem_fp(edev, fp);
                if (rc) {
                        DP_ERR(edev,
                               "Failed to allocate memory for fastpath - rss id = %d\n",
 -                             rss_id);
 +                             queue_id);
                        qede_free_mem_load(edev);
                        return rc;
                }
  /* This function inits fp content and resets the SB, RXQ and TXQ structures */
  static void qede_init_fp(struct qede_dev *edev)
  {
 -      int rss_id, txq_index, tc;
 +      int queue_id, rxq_index = 0, txq_index = 0, tc;
        struct qede_fastpath *fp;
  
 -      for_each_rss(rss_id) {
 -              fp = &edev->fp_array[rss_id];
 +      for_each_queue(queue_id) {
 +              fp = &edev->fp_array[queue_id];
  
                fp->edev = edev;
 -              fp->rss_id = rss_id;
 +              fp->id = queue_id;
  
                memset((void *)&fp->napi, 0, sizeof(fp->napi));
  
                memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info));
  
 -              memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
 -              fp->rxq->rxq_id = rss_id;
 +              if (fp->type & QEDE_FASTPATH_RX) {
 +                      memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
 +                      fp->rxq->rxq_id = rxq_index++;
 +              }
  
 -              memset((void *)fp->txqs, 0, (edev->num_tc * sizeof(*fp->txqs)));
 -              for (tc = 0; tc < edev->num_tc; tc++) {
 -                      txq_index = tc * QEDE_RSS_CNT(edev) + rss_id;
 -                      fp->txqs[tc].index = txq_index;
 +              if (fp->type & QEDE_FASTPATH_TX) {
 +                      memset((void *)fp->txqs, 0,
 +                             (edev->num_tc * sizeof(*fp->txqs)));
 +                      for (tc = 0; tc < edev->num_tc; tc++) {
 +                              fp->txqs[tc].index = txq_index +
 +                                  tc * QEDE_TSS_COUNT(edev);
 +                              if (edev->dev_info.is_legacy)
 +                                      fp->txqs[tc].is_legacy = true;
 +                      }
 +                      txq_index++;
                }
  
                snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
 -                       edev->ndev->name, rss_id);
 +                       edev->ndev->name, queue_id);
        }
  
        edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO);
@@@ -3065,13 -3029,12 +3066,13 @@@ static int qede_set_real_num_queues(str
  {
        int rc = 0;
  
 -      rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_CNT(edev));
 +      rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev));
        if (rc) {
                DP_NOTICE(edev, "Failed to set real number of Tx queues\n");
                return rc;
        }
 -      rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_CNT(edev));
 +
 +      rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_COUNT(edev));
        if (rc) {
                DP_NOTICE(edev, "Failed to set real number of Rx queues\n");
                return rc;
@@@ -3084,7 -3047,7 +3085,7 @@@ static void qede_napi_disable_remove(st
  {
        int i;
  
 -      for_each_rss(i) {
 +      for_each_queue(i) {
                napi_disable(&edev->fp_array[i].napi);
  
                netif_napi_del(&edev->fp_array[i].napi);
@@@ -3096,7 -3059,7 +3097,7 @@@ static void qede_napi_add_enable(struc
        int i;
  
        /* Add NAPI objects */
 -      for_each_rss(i) {
 +      for_each_queue(i) {
                netif_napi_add(edev->ndev, &edev->fp_array[i].napi,
                               qede_poll, NAPI_POLL_WEIGHT);
                napi_enable(&edev->fp_array[i].napi);
@@@ -3125,14 -3088,14 +3126,14 @@@ static int qede_req_msix_irqs(struct qe
        int i, rc;
  
        /* Sanitize number of interrupts == number of prepared RSS queues */
 -      if (QEDE_RSS_CNT(edev) > edev->int_info.msix_cnt) {
 +      if (QEDE_QUEUE_CNT(edev) > edev->int_info.msix_cnt) {
                DP_ERR(edev,
                       "Interrupt mismatch: %d RSS queues > %d MSI-x vectors\n",
 -                     QEDE_RSS_CNT(edev), edev->int_info.msix_cnt);
 +                     QEDE_QUEUE_CNT(edev), edev->int_info.msix_cnt);
                return -EINVAL;
        }
  
 -      for (i = 0; i < QEDE_RSS_CNT(edev); i++) {
 +      for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
                rc = request_irq(edev->int_info.msix[i].vector,
                                 qede_msix_fp_int, 0, edev->fp_array[i].name,
                                 &edev->fp_array[i]);
@@@ -3177,17 -3140,18 +3178,17 @@@ static int qede_setup_irqs(struct qede_
  
                /* qed should learn receive the RSS ids and callbacks */
                ops = edev->ops->common;
 -              for (i = 0; i < QEDE_RSS_CNT(edev); i++)
 +              for (i = 0; i < QEDE_QUEUE_CNT(edev); i++)
                        ops->simd_handler_config(edev->cdev,
                                                 &edev->fp_array[i], i,
                                                 qede_simd_fp_handler);
 -              edev->int_info.used_cnt = QEDE_RSS_CNT(edev);
 +              edev->int_info.used_cnt = QEDE_QUEUE_CNT(edev);
        }
        return 0;
  }
  
  static int qede_drain_txq(struct qede_dev *edev,
 -                        struct qede_tx_queue *txq,
 -                        bool allow_drain)
 +                        struct qede_tx_queue *txq, bool allow_drain)
  {
        int rc, cnt = 1000;
  
@@@ -3239,53 -3203,45 +3240,53 @@@ static int qede_stop_queues(struct qede
        }
  
        /* Flush Tx queues. If needed, request drain from MCP */
 -      for_each_rss(i) {
 +      for_each_queue(i) {
                struct qede_fastpath *fp = &edev->fp_array[i];
  
 -              for (tc = 0; tc < edev->num_tc; tc++) {
 -                      struct qede_tx_queue *txq = &fp->txqs[tc];
 +              if (fp->type & QEDE_FASTPATH_TX) {
 +                      for (tc = 0; tc < edev->num_tc; tc++) {
 +                              struct qede_tx_queue *txq = &fp->txqs[tc];
  
 -                      rc = qede_drain_txq(edev, txq, true);
 -                      if (rc)
 -                              return rc;
 +                              rc = qede_drain_txq(edev, txq, true);
 +                              if (rc)
 +                                      return rc;
 +                      }
                }
        }
  
 -      /* Stop all Queues in reverse order*/
 -      for (i = QEDE_RSS_CNT(edev) - 1; i >= 0; i--) {
 +      /* Stop all Queues in reverse order */
 +      for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
                struct qed_stop_rxq_params rx_params;
  
 -              /* Stop the Tx Queue(s)*/
 -              for (tc = 0; tc < edev->num_tc; tc++) {
 -                      struct qed_stop_txq_params tx_params;
 -
 -                      tx_params.rss_id = i;
 -                      tx_params.tx_queue_id = tc * QEDE_RSS_CNT(edev) + i;
 -                      rc = edev->ops->q_tx_stop(cdev, &tx_params);
 -                      if (rc) {
 -                              DP_ERR(edev, "Failed to stop TXQ #%d\n",
 -                                     tx_params.tx_queue_id);
 -                              return rc;
 +              /* Stop the Tx Queue(s) */
 +              if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
 +                      for (tc = 0; tc < edev->num_tc; tc++) {
 +                              struct qed_stop_txq_params tx_params;
 +                              u8 val;
 +
 +                              tx_params.rss_id = i;
 +                              val = edev->fp_array[i].txqs[tc].index;
 +                              tx_params.tx_queue_id = val;
 +                              rc = edev->ops->q_tx_stop(cdev, &tx_params);
 +                              if (rc) {
 +                                      DP_ERR(edev, "Failed to stop TXQ #%d\n",
 +                                             tx_params.tx_queue_id);
 +                                      return rc;
 +                              }
                        }
                }
  
 -              /* Stop the Rx Queue*/
 -              memset(&rx_params, 0, sizeof(rx_params));
 -              rx_params.rss_id = i;
 -              rx_params.rx_queue_id = i;
 +              /* Stop the Rx Queue */
 +              if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
 +                      memset(&rx_params, 0, sizeof(rx_params));
 +                      rx_params.rss_id = i;
 +                      rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id;
  
 -              rc = edev->ops->q_rx_stop(cdev, &rx_params);
 -              if (rc) {
 -                      DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
 -                      return rc;
 +                      rc = edev->ops->q_rx_stop(cdev, &rx_params);
 +                      if (rc) {
 +                              DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
 +                              return rc;
 +                      }
                }
        }
  
@@@ -3308,7 -3264,7 +3309,7 @@@ static int qede_start_queues(struct qed
        struct qed_start_vport_params start = {0};
        bool reset_rss_indir = false;
  
 -      if (!edev->num_rss) {
 +      if (!edev->num_queues) {
                DP_ERR(edev,
                       "Cannot update V-VPORT as active as there are no Rx queues\n");
                return -EINVAL;
                   "Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n",
                   start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
  
 -      for_each_rss(i) {
 +      for_each_queue(i) {
                struct qede_fastpath *fp = &edev->fp_array[i];
 -              dma_addr_t phys_table = fp->rxq->rx_comp_ring.pbl.p_phys_table;
 -
 -              memset(&q_params, 0, sizeof(q_params));
 -              q_params.rss_id = i;
 -              q_params.queue_id = i;
 -              q_params.vport_id = 0;
 -              q_params.sb = fp->sb_info->igu_sb_id;
 -              q_params.sb_idx = RX_PI;
 -
 -              rc = edev->ops->q_rx_start(cdev, &q_params,
 -                                         fp->rxq->rx_buf_size,
 -                                         fp->rxq->rx_bd_ring.p_phys_addr,
 -                                         phys_table,
 -                                         fp->rxq->rx_comp_ring.page_cnt,
 -                                         &fp->rxq->hw_rxq_prod_addr);
 -              if (rc) {
 -                      DP_ERR(edev, "Start RXQ #%d failed %d\n", i, rc);
 -                      return rc;
 -              }
 +              dma_addr_t p_phys_table;
 +              u32 page_cnt;
 +
 +              if (fp->type & QEDE_FASTPATH_RX) {
 +                      struct qede_rx_queue *rxq = fp->rxq;
 +                      __le16 *val;
 +
 +                      memset(&q_params, 0, sizeof(q_params));
 +                      q_params.rss_id = i;
 +                      q_params.queue_id = rxq->rxq_id;
 +                      q_params.vport_id = 0;
 +                      q_params.sb = fp->sb_info->igu_sb_id;
 +                      q_params.sb_idx = RX_PI;
 +
 +                      p_phys_table =
 +                          qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
 +                      page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
 +
 +                      rc = edev->ops->q_rx_start(cdev, &q_params,
 +                                                 rxq->rx_buf_size,
 +                                                 rxq->rx_bd_ring.p_phys_addr,
 +                                                 p_phys_table,
 +                                                 page_cnt,
 +                                                 &rxq->hw_rxq_prod_addr);
 +                      if (rc) {
 +                              DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
 +                                     rc);
 +                              return rc;
 +                      }
  
 -              fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI];
 +                      val = &fp->sb_info->sb_virt->pi_array[RX_PI];
 +                      rxq->hw_cons_ptr = val;
  
 -              qede_update_rx_prod(edev, fp->rxq);
 +                      qede_update_rx_prod(edev, rxq);
 +              }
 +
 +              if (!(fp->type & QEDE_FASTPATH_TX))
 +                      continue;
  
                for (tc = 0; tc < edev->num_tc; tc++) {
                        struct qede_tx_queue *txq = &fp->txqs[tc];
 -                      int txq_index = tc * QEDE_RSS_CNT(edev) + i;
 +
 +                      p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
 +                      page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
  
                        memset(&q_params, 0, sizeof(q_params));
                        q_params.rss_id = i;
 -                      q_params.queue_id = txq_index;
 +                      q_params.queue_id = txq->index;
                        q_params.vport_id = 0;
                        q_params.sb = fp->sb_info->igu_sb_id;
                        q_params.sb_idx = TX_PI(tc);
  
                        rc = edev->ops->q_tx_start(cdev, &q_params,
 -                                                 txq->tx_pbl.pbl.p_phys_table,
 -                                                 txq->tx_pbl.page_cnt,
 +                                                 p_phys_table, page_cnt,
                                                   &txq->doorbell_addr);
                        if (rc) {
                                DP_ERR(edev, "Start TXQ #%d failed %d\n",
 -                                     txq_index, rc);
 +                                     txq->index, rc);
                                return rc;
                        }
  
        }
  
        /* Fill struct with RSS params */
 -      if (QEDE_RSS_CNT(edev) > 1) {
 +      if (QEDE_RSS_COUNT(edev) > 1) {
                vport_update_params.update_rss_flg = 1;
  
                /* Need to validate current RSS config uses valid entries */
                for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
                        if (edev->rss_params.rss_ind_table[i] >=
 -                          edev->num_rss) {
 +                          QEDE_RSS_COUNT(edev)) {
                                reset_rss_indir = true;
                                break;
                        }
                        for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
                                u16 indir_val;
  
 -                              val = QEDE_RSS_CNT(edev);
 +                              val = QEDE_RSS_COUNT(edev);
                                indir_val = ethtool_rxfh_indir_default(i, val);
                                edev->rss_params.rss_ind_table[i] = indir_val;
                        }
@@@ -3570,7 -3510,7 +3571,7 @@@ static int qede_load(struct qede_dev *e
        if (rc)
                goto err1;
        DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n",
 -              QEDE_RSS_CNT(edev), edev->num_tc);
 +              QEDE_QUEUE_CNT(edev), edev->num_tc);
  
        rc = qede_set_real_num_queues(edev);
        if (rc)
@@@ -3623,9 -3563,7 +3624,9 @@@ err2
  err1:
        edev->ops->common->set_fp_int(edev->cdev, 0);
        qede_free_fp_array(edev);
 -      edev->num_rss = 0;
 +      edev->num_queues = 0;
 +      edev->fp_num_tx = 0;
 +      edev->fp_num_rx = 0;
  err0:
        return rc;
  }
@@@ -201,9 -201,14 +201,14 @@@ static const u16 sh_eth_offset_fast_rz[
  
        [ARSTR]         = 0x0000,
        [TSU_CTRST]     = 0x0004,
+       [TSU_FWSLC]     = 0x0038,
        [TSU_VTAG0]     = 0x0058,
        [TSU_ADSBSY]    = 0x0060,
        [TSU_TEN]       = 0x0064,
+       [TSU_POST1]     = 0x0070,
+       [TSU_POST2]     = 0x0074,
+       [TSU_POST3]     = 0x0078,
+       [TSU_POST4]     = 0x007c,
        [TSU_ADRH0]     = 0x0100,
  
        [TXNLCR0]       = 0x0080,
@@@ -1723,7 -1728,7 +1728,7 @@@ out
  static void sh_eth_adjust_link(struct net_device *ndev)
  {
        struct sh_eth_private *mdp = netdev_priv(ndev);
 -      struct phy_device *phydev = mdp->phydev;
 +      struct phy_device *phydev = ndev->phydev;
        int new_state = 0;
  
        if (phydev->link) {
@@@ -1800,48 -1805,51 +1805,48 @@@ static int sh_eth_phy_init(struct net_d
  
        phy_attached_info(phydev);
  
 -      mdp->phydev = phydev;
 -
        return 0;
  }
  
  /* PHY control start function */
  static int sh_eth_phy_start(struct net_device *ndev)
  {
 -      struct sh_eth_private *mdp = netdev_priv(ndev);
        int ret;
  
        ret = sh_eth_phy_init(ndev);
        if (ret)
                return ret;
  
 -      phy_start(mdp->phydev);
 +      phy_start(ndev->phydev);
  
        return 0;
  }
  
 -static int sh_eth_get_settings(struct net_device *ndev,
 -                             struct ethtool_cmd *ecmd)
 +static int sh_eth_get_link_ksettings(struct net_device *ndev,
 +                                   struct ethtool_link_ksettings *cmd)
  {
        struct sh_eth_private *mdp = netdev_priv(ndev);
        unsigned long flags;
        int ret;
  
 -      if (!mdp->phydev)
 +      if (!ndev->phydev)
                return -ENODEV;
  
        spin_lock_irqsave(&mdp->lock, flags);
 -      ret = phy_ethtool_gset(mdp->phydev, ecmd);
 +      ret = phy_ethtool_ksettings_get(ndev->phydev, cmd);
        spin_unlock_irqrestore(&mdp->lock, flags);
  
        return ret;
  }
  
 -static int sh_eth_set_settings(struct net_device *ndev,
 -                             struct ethtool_cmd *ecmd)
 +static int sh_eth_set_link_ksettings(struct net_device *ndev,
 +                                   const struct ethtool_link_ksettings *cmd)
  {
        struct sh_eth_private *mdp = netdev_priv(ndev);
        unsigned long flags;
        int ret;
  
 -      if (!mdp->phydev)
 +      if (!ndev->phydev)
                return -ENODEV;
  
        spin_lock_irqsave(&mdp->lock, flags);
        /* disable tx and rx */
        sh_eth_rcv_snd_disable(ndev);
  
 -      ret = phy_ethtool_sset(mdp->phydev, ecmd);
 +      ret = phy_ethtool_ksettings_set(ndev->phydev, cmd);
        if (ret)
                goto error_exit;
  
 -      if (ecmd->duplex == DUPLEX_FULL)
 +      if (cmd->base.duplex == DUPLEX_FULL)
                mdp->duplex = 1;
        else
                mdp->duplex = 0;
@@@ -2064,11 -2072,11 +2069,11 @@@ static int sh_eth_nway_reset(struct net
        unsigned long flags;
        int ret;
  
 -      if (!mdp->phydev)
 +      if (!ndev->phydev)
                return -ENODEV;
  
        spin_lock_irqsave(&mdp->lock, flags);
 -      ret = phy_start_aneg(mdp->phydev);
 +      ret = phy_start_aneg(ndev->phydev);
        spin_unlock_irqrestore(&mdp->lock, flags);
  
        return ret;
@@@ -2195,6 -2203,8 +2200,6 @@@ static int sh_eth_set_ringparam(struct 
  }
  
  static const struct ethtool_ops sh_eth_ethtool_ops = {
 -      .get_settings   = sh_eth_get_settings,
 -      .set_settings   = sh_eth_set_settings,
        .get_regs_len   = sh_eth_get_regs_len,
        .get_regs       = sh_eth_get_regs,
        .nway_reset     = sh_eth_nway_reset,
        .get_sset_count     = sh_eth_get_sset_count,
        .get_ringparam  = sh_eth_get_ringparam,
        .set_ringparam  = sh_eth_set_ringparam,
 +      .get_link_ksettings = sh_eth_get_link_ksettings,
 +      .set_link_ksettings = sh_eth_set_link_ksettings,
  };
  
  /* network device open function */
@@@ -2405,9 -2413,10 +2410,9 @@@ static int sh_eth_close(struct net_devi
        sh_eth_dev_exit(ndev);
  
        /* PHY Disconnect */
 -      if (mdp->phydev) {
 -              phy_stop(mdp->phydev);
 -              phy_disconnect(mdp->phydev);
 -              mdp->phydev = NULL;
 +      if (ndev->phydev) {
 +              phy_stop(ndev->phydev);
 +              phy_disconnect(ndev->phydev);
        }
  
        free_irq(ndev->irq, ndev);
  /* ioctl to device function */
  static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
  {
 -      struct sh_eth_private *mdp = netdev_priv(ndev);
 -      struct phy_device *phydev = mdp->phydev;
 +      struct phy_device *phydev = ndev->phydev;
  
        if (!netif_running(ndev))
                return -EINVAL;
@@@ -2781,6 -2791,8 +2786,8 @@@ static void sh_eth_tsu_init(struct sh_e
  {
        if (sh_eth_is_rz_fast_ether(mdp)) {
                sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
+               sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
+                                TSU_FWSLC);    /* Enable POST registers */
                return;
        }
  
@@@ -62,7 -62,6 +62,7 @@@
  #include <linux/acpi.h>
  #include <linux/pm_runtime.h>
  #include <linux/property.h>
 +#include <linux/gpio/consumer.h>
  
  #include "smsc911x.h"
  
@@@ -148,9 -147,6 +148,9 @@@ struct smsc911x_data 
        /* regulators */
        struct regulator_bulk_data supplies[SMSC911X_NUM_SUPPLIES];
  
 +      /* Reset GPIO */
 +      struct gpio_desc *reset_gpiod;
 +
        /* clock */
        struct clk *clk;
  };
@@@ -442,11 -438,6 +442,11 @@@ static int smsc911x_request_resources(s
                netdev_err(ndev, "couldn't get regulators %d\n",
                                ret);
  
 +      /* Request optional RESET GPIO */
 +      pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev,
 +                                                   "reset",
 +                                                   GPIOD_OUT_LOW);
 +
        /* Request clock */
        pdata->clk = clk_get(&pdev->dev, NULL);
        if (IS_ERR(pdata->clk))
@@@ -1108,15 -1099,8 +1108,8 @@@ static int smsc911x_mii_init(struct pla
                goto err_out_free_bus_2;
        }
  
-       if (smsc911x_mii_probe(dev) < 0) {
-               SMSC_WARN(pdata, probe, "Error registering mii bus");
-               goto err_out_unregister_bus_3;
-       }
        return 0;
  
- err_out_unregister_bus_3:
-       mdiobus_unregister(pdata->mii_bus);
  err_out_free_bus_2:
        mdiobus_free(pdata->mii_bus);
  err_out_1:
@@@ -1523,23 -1507,90 +1516,90 @@@ static void smsc911x_disable_irq_chip(s
        smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF);
  }
  
+ static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
+ {
+       struct net_device *dev = dev_id;
+       struct smsc911x_data *pdata = netdev_priv(dev);
+       u32 intsts = smsc911x_reg_read(pdata, INT_STS);
+       u32 inten = smsc911x_reg_read(pdata, INT_EN);
+       int serviced = IRQ_NONE;
+       u32 temp;
+       if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
+               temp = smsc911x_reg_read(pdata, INT_EN);
+               temp &= (~INT_EN_SW_INT_EN_);
+               smsc911x_reg_write(pdata, INT_EN, temp);
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
+               pdata->software_irq_signal = 1;
+               smp_wmb();
+               serviced = IRQ_HANDLED;
+       }
+       if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
+               /* Called when there is a multicast update scheduled and
+                * it is now safe to complete the update */
+               SMSC_TRACE(pdata, intr, "RX Stop interrupt");
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
+               if (pdata->multicast_update_pending)
+                       smsc911x_rx_multicast_update_workaround(pdata);
+               serviced = IRQ_HANDLED;
+       }
+       if (intsts & inten & INT_STS_TDFA_) {
+               temp = smsc911x_reg_read(pdata, FIFO_INT);
+               temp |= FIFO_INT_TX_AVAIL_LEVEL_;
+               smsc911x_reg_write(pdata, FIFO_INT, temp);
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
+               netif_wake_queue(dev);
+               serviced = IRQ_HANDLED;
+       }
+       if (unlikely(intsts & inten & INT_STS_RXE_)) {
+               SMSC_TRACE(pdata, intr, "RX Error interrupt");
+               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
+               serviced = IRQ_HANDLED;
+       }
+       if (likely(intsts & inten & INT_STS_RSFL_)) {
+               if (likely(napi_schedule_prep(&pdata->napi))) {
+                       /* Disable Rx interrupts */
+                       temp = smsc911x_reg_read(pdata, INT_EN);
+                       temp &= (~INT_EN_RSFL_EN_);
+                       smsc911x_reg_write(pdata, INT_EN, temp);
+                       /* Schedule a NAPI poll */
+                       __napi_schedule(&pdata->napi);
+               } else {
+                       SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed");
+               }
+               serviced = IRQ_HANDLED;
+       }
+       return serviced;
+ }
  static int smsc911x_open(struct net_device *dev)
  {
        struct smsc911x_data *pdata = netdev_priv(dev);
        unsigned int timeout;
        unsigned int temp;
        unsigned int intcfg;
+       int retval;
+       int irq_flags;
  
-       /* if the phy is not yet registered, retry later*/
+       /* find and start the given phy */
        if (!dev->phydev) {
-               SMSC_WARN(pdata, hw, "phy_dev is NULL");
-               return -EAGAIN;
+               retval = smsc911x_mii_probe(dev);
+               if (retval < 0) {
+                       SMSC_WARN(pdata, probe, "Error starting phy");
+                       goto out;
+               }
        }
  
        /* Reset the LAN911x */
-       if (smsc911x_soft_reset(pdata)) {
+       retval = smsc911x_soft_reset(pdata);
+       if (retval) {
                SMSC_WARN(pdata, hw, "soft reset failed");
-               return -EIO;
+               goto mii_free_out;
        }
  
        smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
        pdata->software_irq_signal = 0;
        smp_wmb();
  
+       irq_flags = irq_get_trigger_type(dev->irq);
+       retval = request_irq(dev->irq, smsc911x_irqhandler,
+                            irq_flags | IRQF_SHARED, dev->name, dev);
+       if (retval) {
+               SMSC_WARN(pdata, probe,
+                         "Unable to claim requested irq: %d", dev->irq);
+               goto mii_free_out;
+       }
        temp = smsc911x_reg_read(pdata, INT_EN);
        temp |= INT_EN_SW_INT_EN_;
        smsc911x_reg_write(pdata, INT_EN, temp);
        if (!pdata->software_irq_signal) {
                netdev_warn(dev, "ISR failed signaling test (IRQ %d)\n",
                            dev->irq);
-               return -ENODEV;
+               retval = -ENODEV;
+               goto irq_stop_out;
        }
        SMSC_TRACE(pdata, ifup, "IRQ handler passed test using IRQ %d",
                   dev->irq);
  
        netif_start_queue(dev);
        return 0;
+ irq_stop_out:
+       free_irq(dev->irq, dev);
+ mii_free_out:
+       phy_disconnect(dev->phydev);
+       dev->phydev = NULL;
+ out:
+       return retval;
  }
  
  /* Entry point for stopping the interface */
@@@ -1676,9 -1745,15 +1754,15 @@@ static int smsc911x_stop(struct net_dev
        dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP);
        smsc911x_tx_update_txcounters(dev);
  
+       free_irq(dev->irq, dev);
        /* Bring the PHY down */
-       if (dev->phydev)
+       if (dev->phydev) {
                phy_stop(dev->phydev);
+               phy_disconnect(dev->phydev);
+               dev->phydev = NULL;
+       }
+       netif_carrier_off(dev);
  
        SMSC_TRACE(pdata, ifdown, "Interface stopped");
        return 0;
@@@ -1820,67 -1895,6 +1904,6 @@@ static void smsc911x_set_multicast_list
        spin_unlock_irqrestore(&pdata->mac_lock, flags);
  }
  
- static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
- {
-       struct net_device *dev = dev_id;
-       struct smsc911x_data *pdata = netdev_priv(dev);
-       u32 intsts = smsc911x_reg_read(pdata, INT_STS);
-       u32 inten = smsc911x_reg_read(pdata, INT_EN);
-       int serviced = IRQ_NONE;
-       u32 temp;
-       if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
-               temp = smsc911x_reg_read(pdata, INT_EN);
-               temp &= (~INT_EN_SW_INT_EN_);
-               smsc911x_reg_write(pdata, INT_EN, temp);
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
-               pdata->software_irq_signal = 1;
-               smp_wmb();
-               serviced = IRQ_HANDLED;
-       }
-       if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
-               /* Called when there is a multicast update scheduled and
-                * it is now safe to complete the update */
-               SMSC_TRACE(pdata, intr, "RX Stop interrupt");
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
-               if (pdata->multicast_update_pending)
-                       smsc911x_rx_multicast_update_workaround(pdata);
-               serviced = IRQ_HANDLED;
-       }
-       if (intsts & inten & INT_STS_TDFA_) {
-               temp = smsc911x_reg_read(pdata, FIFO_INT);
-               temp |= FIFO_INT_TX_AVAIL_LEVEL_;
-               smsc911x_reg_write(pdata, FIFO_INT, temp);
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
-               netif_wake_queue(dev);
-               serviced = IRQ_HANDLED;
-       }
-       if (unlikely(intsts & inten & INT_STS_RXE_)) {
-               SMSC_TRACE(pdata, intr, "RX Error interrupt");
-               smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
-               serviced = IRQ_HANDLED;
-       }
-       if (likely(intsts & inten & INT_STS_RSFL_)) {
-               if (likely(napi_schedule_prep(&pdata->napi))) {
-                       /* Disable Rx interrupts */
-                       temp = smsc911x_reg_read(pdata, INT_EN);
-                       temp &= (~INT_EN_RSFL_EN_);
-                       smsc911x_reg_write(pdata, INT_EN, temp);
-                       /* Schedule a NAPI poll */
-                       __napi_schedule(&pdata->napi);
-               } else {
-                       SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed");
-               }
-               serviced = IRQ_HANDLED;
-       }
-       return serviced;
- }
  #ifdef CONFIG_NET_POLL_CONTROLLER
  static void smsc911x_poll_controller(struct net_device *dev)
  {
@@@ -2300,16 -2314,14 +2323,14 @@@ static int smsc911x_drv_remove(struct p
        pdata = netdev_priv(dev);
        BUG_ON(!pdata);
        BUG_ON(!pdata->ioaddr);
-       BUG_ON(!dev->phydev);
+       WARN_ON(dev->phydev);
  
        SMSC_TRACE(pdata, ifdown, "Stopping driver");
  
-       phy_disconnect(dev->phydev);
        mdiobus_unregister(pdata->mii_bus);
        mdiobus_free(pdata->mii_bus);
  
        unregister_netdev(dev);
-       free_irq(dev->irq, dev);
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
                                           "smsc911x-memory");
        if (!res)
@@@ -2394,8 -2406,7 +2415,7 @@@ static int smsc911x_drv_probe(struct pl
        struct smsc911x_data *pdata;
        struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev);
        struct resource *res;
-       unsigned int intcfg = 0;
-       int res_size, irq, irq_flags;
+       int res_size, irq;
        int retval;
  
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
  
        pdata = netdev_priv(dev);
        dev->irq = irq;
-       irq_flags = irq_get_trigger_type(irq);
        pdata->ioaddr = ioremap_nocache(res->start, res_size);
  
        pdata->dev = dev;
        if (retval < 0)
                goto out_disable_resources;
  
-       /* configure irq polarity and type before connecting isr */
-       if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
-               intcfg |= INT_CFG_IRQ_POL_;
-       if (pdata->config.irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL)
-               intcfg |= INT_CFG_IRQ_TYPE_;
-       smsc911x_reg_write(pdata, INT_CFG, intcfg);
-       /* Ensure interrupts are globally disabled before connecting ISR */
-       smsc911x_disable_irq_chip(dev);
+       netif_carrier_off(dev);
  
-       retval = request_irq(dev->irq, smsc911x_irqhandler,
-                            irq_flags | IRQF_SHARED, dev->name, dev);
+       retval = smsc911x_mii_init(pdev, dev);
        if (retval) {
-               SMSC_WARN(pdata, probe,
-                         "Unable to claim requested irq: %d", dev->irq);
+               SMSC_WARN(pdata, probe, "Error %i initialising mii", retval);
                goto out_disable_resources;
        }
  
-       netif_carrier_off(dev);
        retval = register_netdev(dev);
        if (retval) {
                SMSC_WARN(pdata, probe, "Error %i registering device", retval);
-               goto out_free_irq;
+               goto out_disable_resources;
        } else {
                SMSC_TRACE(pdata, probe,
                           "Network interface: \"%s\"", dev->name);
        }
  
-       retval = smsc911x_mii_init(pdev, dev);
-       if (retval) {
-               SMSC_WARN(pdata, probe, "Error %i initialising mii", retval);
-               goto out_unregister_netdev_5;
-       }
        spin_lock_irq(&pdata->mac_lock);
  
        /* Check if mac address has been specified when bringing interface up */
  
        return 0;
  
- out_unregister_netdev_5:
-       unregister_netdev(dev);
- out_free_irq:
-       free_irq(dev->irq, dev);
  out_disable_resources:
        pm_runtime_put(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
@@@ -1246,7 -1246,7 +1246,7 @@@ static int dwceqos_mii_init(struct net_
        lp->mii_bus->read  = &dwceqos_mdio_read;
        lp->mii_bus->write = &dwceqos_mdio_write;
        lp->mii_bus->priv = lp;
-       lp->mii_bus->parent = &lp->ndev->dev;
+       lp->mii_bus->parent = &lp->pdev->dev;
  
        of_address_to_resource(lp->pdev->dev.of_node, 0, &res);
        snprintf(lp->mii_bus->id, MII_BUS_ID_SIZE, "%.8llx",
@@@ -2743,7 -2743,7 +2743,7 @@@ static void dwceqos_set_msglevel(struc
        lp->msg_enable = msglevel;
  }
  
 -static struct ethtool_ops dwceqos_ethtool_ops = {
 +static const struct ethtool_ops dwceqos_ethtool_ops = {
        .get_drvinfo    = dwceqos_get_drvinfo,
        .get_link       = ethtool_op_get_link,
        .get_pauseparam = dwceqos_get_pauseparam,
@@@ -2853,25 -2853,17 +2853,17 @@@ static int dwceqos_probe(struct platfor
  
        ndev->features = ndev->hw_features;
  
-       netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT);
-       ret = register_netdev(ndev);
-       if (ret) {
-               dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-               goto err_out_clk_dis_aper;
-       }
        lp->phy_ref_clk = devm_clk_get(&pdev->dev, "phy_ref_clk");
        if (IS_ERR(lp->phy_ref_clk)) {
                dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
                ret = PTR_ERR(lp->phy_ref_clk);
-               goto err_out_unregister_netdev;
+               goto err_out_clk_dis_aper;
        }
  
        ret = clk_prepare_enable(lp->phy_ref_clk);
        if (ret) {
                dev_err(&pdev->dev, "Unable to enable device clock.\n");
-               goto err_out_unregister_netdev;
+               goto err_out_clk_dis_aper;
        }
  
        lp->phy_node = of_parse_phandle(lp->pdev->dev.of_node,
                ret = of_phy_register_fixed_link(lp->pdev->dev.of_node);
                if (ret < 0) {
                        dev_err(&pdev->dev, "invalid fixed-link");
-                       goto err_out_unregister_clk_notifier;
+                       goto err_out_clk_dis_phy;
                }
  
                lp->phy_node = of_node_get(lp->pdev->dev.of_node);
        ret = of_get_phy_mode(lp->pdev->dev.of_node);
        if (ret < 0) {
                dev_err(&lp->pdev->dev, "error in getting phy i/f\n");
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
  
        lp->phy_interface = ret;
        ret = dwceqos_mii_init(lp);
        if (ret) {
                dev_err(&lp->pdev->dev, "error in dwceqos_mii_init\n");
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
  
        ret = dwceqos_mii_probe(ndev);
        if (ret != 0) {
                netdev_err(ndev, "mii_probe fail.\n");
                ret = -ENXIO;
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
  
        dwceqos_set_umac_addr(lp, lp->ndev->dev_addr, 0);
        if (ret) {
                dev_err(&lp->pdev->dev, "Unable to retrieve DT, error %d\n",
                        ret);
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
        dev_info(&lp->pdev->dev, "pdev->id %d, baseaddr 0x%08lx, irq %d\n",
                 pdev->id, ndev->base_addr, ndev->irq);
        if (ret) {
                dev_err(&lp->pdev->dev, "Unable to request IRQ %d, error %d\n",
                        ndev->irq, ret);
-               goto err_out_unregister_clk_notifier;
+               goto err_out_clk_dis_phy;
        }
  
        if (netif_msg_probe(lp))
                netdev_dbg(ndev, "net_local@%p\n", lp);
  
+       netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT);
+       ret = register_netdev(ndev);
+       if (ret) {
+               dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
+                       goto err_out_clk_dis_phy;
+       }
        return 0;
  
- err_out_unregister_clk_notifier:
+ err_out_clk_dis_phy:
        clk_disable_unprepare(lp->phy_ref_clk);
- err_out_unregister_netdev:
-       unregister_netdev(ndev);
  err_out_clk_dis_aper:
        clk_disable_unprepare(lp->apb_pclk);
  err_out_free_netdev:
diff --combined drivers/net/phy/Kconfig
@@@ -15,218 -15,152 +15,218 @@@ if PHYLI
  config SWPHY
        bool
  
 -comment "MII PHY device drivers"
 -
 -config AQUANTIA_PHY
 -        tristate "Drivers for the Aquantia PHYs"
 -        ---help---
 -          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
 +comment "MDIO bus device drivers"
  
 -config AT803X_PHY
 -      tristate "Drivers for Atheros AT803X PHYs"
 -      ---help---
 -        Currently supports the AT8030 and AT8035 model
 +config MDIO_BCM_IPROC
 +      tristate "Broadcom iProc MDIO bus controller"
 +      depends on ARCH_BCM_IPROC || COMPILE_TEST
 +      depends on HAS_IOMEM && OF_MDIO
 +      help
 +        This module provides a driver for the MDIO busses found in the
 +        Broadcom iProc SoC's.
  
 -config AMD_PHY
 -      tristate "Drivers for the AMD PHYs"
 -      ---help---
 -        Currently supports the am79c874
 +config MDIO_BCM_UNIMAC
 +      tristate "Broadcom UniMAC MDIO bus controller"
 +      depends on HAS_IOMEM
 +      help
 +        This module provides a driver for the Broadcom UniMAC MDIO busses.
 +        This hardware can be found in the Broadcom GENET Ethernet MAC
 +        controllers as well as some Broadcom Ethernet switches such as the
 +        Starfighter 2 switches.
  
 -config MARVELL_PHY
 -      tristate "Drivers for Marvell PHYs"
 -      ---help---
 -        Currently has a driver for the 88E1011S
 -      
 -config DAVICOM_PHY
 -      tristate "Drivers for Davicom PHYs"
 -      ---help---
 -        Currently supports dm9161e and dm9131
 +config MDIO_BITBANG
 +      tristate "Bitbanged MDIO buses"
 +      help
 +        This module implements the MDIO bus protocol in software,
 +        for use by low level drivers that export the ability to
 +        drive the relevant pins.
  
 -config QSEMI_PHY
 -      tristate "Drivers for Quality Semiconductor PHYs"
 -      ---help---
 -        Currently supports the qs6612
 +        If in doubt, say N.
  
 -config LXT_PHY
 -      tristate "Drivers for the Intel LXT PHYs"
 -      ---help---
 -        Currently supports the lxt970, lxt971
 +config MDIO_BUS_MUX
 +      tristate
 +      depends on OF_MDIO
 +      help
 +        This module provides a driver framework for MDIO bus
 +        multiplexers which connect one of several child MDIO busses
 +        to a parent bus.  Switching between child busses is done by
 +        device specific drivers.
  
 -config CICADA_PHY
 -      tristate "Drivers for the Cicada PHYs"
 -      ---help---
 -        Currently supports the cis8204
 +config MDIO_BUS_MUX_BCM_IPROC
 +      tristate "Broadcom iProc based MDIO bus multiplexers"
 +      depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
 +      select MDIO_BUS_MUX
 +      default ARCH_BCM_IPROC
 +      help
 +        This module provides a driver for MDIO bus multiplexers found in
 +        iProc based Broadcom SoCs. This multiplexer connects one of several
 +        child MDIO bus to a parent bus. Buses could be internal as well as
 +        external and selection logic lies inside the same multiplexer.
  
 -config VITESSE_PHY
 -        tristate "Drivers for the Vitesse PHYs"
 -        ---help---
 -          Currently supports the vsc8244
 +config MDIO_BUS_MUX_GPIO
 +      tristate "GPIO controlled MDIO bus multiplexers"
 +      depends on OF_GPIO && OF_MDIO
 +      select MDIO_BUS_MUX
 +      help
 +        This module provides a driver for MDIO bus multiplexers that
 +        are controlled via GPIO lines.  The multiplexer connects one of
 +        several child MDIO busses to a parent bus.  Child bus
 +        selection is under the control of GPIO lines.
  
 -config TERANETICS_PHY
 -        tristate "Drivers for the Teranetics PHYs"
 -        ---help---
 -          Currently supports the Teranetics TN2020
 +config MDIO_BUS_MUX_MMIOREG
 +      tristate "MMIO device-controlled MDIO bus multiplexers"
 +      depends on OF_MDIO && HAS_IOMEM
 +      select MDIO_BUS_MUX
 +      help
 +        This module provides a driver for MDIO bus multiplexers that
 +        are controlled via a simple memory-mapped device, like an FPGA.
 +        The multiplexer connects one of several child MDIO busses to a
 +        parent bus.  Child bus selection is under the control of one of
 +        the FPGA's registers.
  
 -config SMSC_PHY
 -      tristate "Drivers for SMSC PHYs"
 -      ---help---
 -        Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
 +        Currently, only 8-bit registers are supported.
  
 -config BCM_NET_PHYLIB
 +config MDIO_CAVIUM
        tristate
  
 -config BROADCOM_PHY
 -      tristate "Drivers for Broadcom PHYs"
 -      select BCM_NET_PHYLIB
 +config MDIO_GPIO
 +      tristate "GPIO lib-based bitbanged MDIO buses"
 +      depends on MDIO_BITBANG && GPIOLIB
        ---help---
 -        Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
 -        BCM5481 and BCM5482 PHYs.
 +        Supports GPIO lib-based MDIO busses.
  
 -config BCM_CYGNUS_PHY
 -      tristate "Drivers for Broadcom Cygnus SoC internal PHY"
 -      depends on ARCH_BCM_CYGNUS || COMPILE_TEST
 -      depends on MDIO_BCM_IPROC
 -      select BCM_NET_PHYLIB
 +        To compile this driver as a module, choose M here: the module
 +        will be called mdio-gpio.
 +
 +config MDIO_HISI_FEMAC
 +      tristate "Hisilicon FEMAC MDIO bus controller"
 +      depends on HAS_IOMEM && OF_MDIO
 +      help
 +        This module provides a driver for the MDIO busses found in the
 +        Hisilicon SoC that have an Fast Ethernet MAC.
 +
 +config MDIO_MOXART
 +        tristate "MOXA ART MDIO interface support"
 +        depends on ARCH_MOXART
 +        help
 +          This driver supports the MDIO interface found in the network
 +          interface units of the MOXA ART SoC
 +
 +config MDIO_OCTEON
 +      tristate "Octeon and some ThunderX SOCs MDIO buses"
 +      depends on 64BIT
 +      depends on HAS_IOMEM
 +      select MDIO_CAVIUM
 +      help
 +        This module provides a driver for the Octeon and ThunderX MDIO
 +        buses. It is required by the Octeon and ThunderX ethernet device
 +        drivers on some systems.
 +
 +config MDIO_SUN4I
 +      tristate "Allwinner sun4i MDIO interface support"
 +      depends on ARCH_SUNXI
 +      help
 +        This driver supports the MDIO interface found in the network
 +        interface units of the Allwinner SoC that have an EMAC (A10,
 +        A12, A10s, etc.)
 +
 +config MDIO_THUNDER
 +      tristate "ThunderX SOCs MDIO buses"
 +      depends on 64BIT
 +      depends on PCI
 +      select MDIO_CAVIUM
 +      help
 +        This driver supports the MDIO interfaces found on Cavium
 +        ThunderX SoCs when the MDIO bus device appears as a PCI
 +        device.
 +
 +config MDIO_XGENE
 +      tristate "APM X-Gene SoC MDIO bus controller"
 +      help
 +        This module provides a driver for the MDIO busses found in the
 +        APM X-Gene SoC's.
 +
 +comment "MII PHY device drivers"
 +
 +config AMD_PHY
 +      tristate "AMD PHYs"
        ---help---
 -        This PHY driver is for the 1G internal PHYs of the Broadcom
 -        Cygnus Family SoC.
 +        Currently supports the am79c874
  
 -        Currently supports internal PHY's used in the BCM11300,
 -        BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
 -        BCM58303 & BCM58305 Broadcom Cygnus SoCs.
 +config AQUANTIA_PHY
 +        tristate "Aquantia PHYs"
 +        ---help---
 +          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
 +
 +config AT803X_PHY
 +      tristate "AT803X PHYs"
 +      ---help---
 +        Currently supports the AT8030 and AT8035 model
  
  config BCM63XX_PHY
 -      tristate "Drivers for Broadcom 63xx SOCs internal PHY"
 +      tristate "Broadcom 63xx SOCs internal PHY"
        depends on BCM63XX
        select BCM_NET_PHYLIB
        ---help---
          Currently supports the 6348 and 6358 PHYs.
  
  config BCM7XXX_PHY
 -      tristate "Drivers for Broadcom 7xxx SOCs internal PHYs"
 +      tristate "Broadcom 7xxx SOCs internal PHYs"
        select BCM_NET_PHYLIB
        ---help---
          Currently supports the BCM7366, BCM7439, BCM7445, and
          40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
  
  config BCM87XX_PHY
 -      tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs"
 +      tristate "Broadcom BCM8706 and BCM8727 PHYs"
        help
          Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs.
  
 -config ICPLUS_PHY
 -      tristate "Drivers for ICPlus PHYs"
 +config BCM_CYGNUS_PHY
 +      tristate "Broadcom Cygnus SoC internal PHY"
 +      depends on ARCH_BCM_CYGNUS || COMPILE_TEST
 +      depends on MDIO_BCM_IPROC
 +      select BCM_NET_PHYLIB
        ---help---
 -        Currently supports the IP175C and IP1001 PHYs.
 +        This PHY driver is for the 1G internal PHYs of the Broadcom
 +        Cygnus Family SoC.
  
 -config REALTEK_PHY
 -      tristate "Drivers for Realtek PHYs"
 -      ---help---
 -        Supports the Realtek 821x PHY.
 +        Currently supports internal PHY's used in the BCM11300,
 +        BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
 +        BCM58303 & BCM58305 Broadcom Cygnus SoCs.
  
 -config NATIONAL_PHY
 -      tristate "Drivers for National Semiconductor PHYs"
 -      ---help---
 -        Currently supports the DP83865 PHY.
 +config BCM_NET_PHYLIB
 +      tristate
  
 -config STE10XP
 -      tristate "Driver for STMicroelectronics STe10Xp PHYs"
 +config BROADCOM_PHY
 +      tristate "Broadcom PHYs"
 +      select BCM_NET_PHYLIB
        ---help---
 -        This is the driver for the STe100p and STe101p PHYs.
 +        Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
 +        BCM5481 and BCM5482 PHYs.
  
 -config LSI_ET1011C_PHY
 -      tristate "Driver for LSI ET1011C PHY"
 +config CICADA_PHY
 +      tristate "Cicada PHYs"
        ---help---
 -        Supports the LSI ET1011C PHY.
 +        Currently supports the cis8204
  
 -config MICREL_PHY
 -      tristate "Driver for Micrel PHYs"
 +config DAVICOM_PHY
 +      tristate "Davicom PHYs"
        ---help---
 -        Supports the KSZ9021, VSC8201, KS8001 PHYs.
 +        Currently supports dm9161e and dm9131
  
  config DP83848_PHY
 -      tristate "Driver for Texas Instruments DP83848 PHY"
 +      tristate "Texas Instruments DP83848 PHY"
        ---help---
          Supports the DP83848 PHY.
  
  config DP83867_PHY
 -      tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
 +      tristate "Texas Instruments DP83867 Gigabit PHY"
        ---help---
          Currently supports the DP83867 PHY.
  
 -config MICROCHIP_PHY
 -      tristate "Drivers for Microchip PHYs"
 -      help
 -        Supports the LAN88XX PHYs.
 -
  config FIXED_PHY
 -      tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
 +      tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
        depends on PHYLIB
        select SWPHY
        ---help---
  
          Currently tested with mpc866ads and mpc8349e-mitx.
  
 -config MDIO_BITBANG
 -      tristate "Support for bitbanged MDIO buses"
 -      help
 -        This module implements the MDIO bus protocol in software,
 -        for use by low level drivers that export the ability to
 -        drive the relevant pins.
 -
 -        If in doubt, say N.
 -
 -config MDIO_GPIO
 -      tristate "Support for GPIO lib-based bitbanged MDIO buses"
 -      depends on MDIO_BITBANG && GPIOLIB
 +config ICPLUS_PHY
 +      tristate "ICPlus PHYs"
        ---help---
 -        Supports GPIO lib-based MDIO busses.
 +        Currently supports the IP175C and IP1001 PHYs.
  
 -        To compile this driver as a module, choose M here: the module
 -        will be called mdio-gpio.
 +config INTEL_XWAY_PHY
 +      tristate "Intel XWAY PHYs"
 +      ---help---
 +        Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
 +        These PHYs are marked as standalone chips under the names
 +        PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
 +        SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
  
 -config MDIO_CAVIUM
 -      tristate
 +config LSI_ET1011C_PHY
 +      tristate "LSI ET1011C PHY"
 +      ---help---
 +        Supports the LSI ET1011C PHY.
  
 -config MDIO_OCTEON
 -      tristate "Support for MDIO buses on Octeon and some ThunderX SOCs"
 -      depends on 64BIT
 -      depends on HAS_IOMEM
 -      select MDIO_CAVIUM
 -      help
 -        This module provides a driver for the Octeon and ThunderX MDIO
 -        buses. It is required by the Octeon and ThunderX ethernet device
 -        drivers on some systems.
 +config LXT_PHY
 +      tristate "Intel LXT PHYs"
 +      ---help---
 +        Currently supports the lxt970, lxt971
  
 -config MDIO_THUNDER
 -      tristate "Support for MDIO buses on ThunderX SOCs"
 -      depends on 64BIT
 -      depends on PCI
 -      select MDIO_CAVIUM
 -      help
 -        This driver supports the MDIO interfaces found on Cavium
 -        ThunderX SoCs when the MDIO bus device appears as a PCI
 -        device.
 +config MARVELL_PHY
 +      tristate "Marvell PHYs"
 +      ---help---
 +        Currently has a driver for the 88E1011S
  
 +config MICREL_PHY
 +      tristate "Micrel PHYs"
 +      ---help---
 +        Supports the KSZ9021, VSC8201, KS8001 PHYs.
  
 -config MDIO_SUN4I
 -      tristate "Allwinner sun4i MDIO interface support"
 -      depends on ARCH_SUNXI
 +config MICROCHIP_PHY
 +      tristate "Microchip PHYs"
        help
 -        This driver supports the MDIO interface found in the network
 -        interface units of the Allwinner SoC that have an EMAC (A10,
 -        A12, A10s, etc.)
 -
 -config MDIO_MOXART
 -        tristate "MOXA ART MDIO interface support"
 -        depends on ARCH_MOXART
 -        help
 -          This driver supports the MDIO interface found in the network
 -          interface units of the MOXA ART SoC
 +        Supports the LAN88XX PHYs.
  
 -config MDIO_BUS_MUX
 -      tristate
 -      depends on OF_MDIO
 -      help
 -        This module provides a driver framework for MDIO bus
 -        multiplexers which connect one of several child MDIO busses
 -        to a parent bus.  Switching between child busses is done by
 -        device specific drivers.
 +config MICROSEMI_PHY
 +      tristate "Microsemi PHYs"
 +      ---help---
 +        Currently supports the VSC8531 and VSC8541 PHYs
  
 -config MDIO_BUS_MUX_GPIO
 -      tristate "Support for GPIO controlled MDIO bus multiplexers"
 -      depends on OF_GPIO && OF_MDIO
 -      select MDIO_BUS_MUX
 -      help
 -        This module provides a driver for MDIO bus multiplexers that
 -        are controlled via GPIO lines.  The multiplexer connects one of
 -        several child MDIO busses to a parent bus.  Child bus
 -        selection is under the control of GPIO lines.
 +config NATIONAL_PHY
 +      tristate "National Semiconductor PHYs"
 +      ---help---
 +        Currently supports the DP83865 PHY.
  
 -config MDIO_BUS_MUX_MMIOREG
 -      tristate "Support for MMIO device-controlled MDIO bus multiplexers"
 -      depends on OF_MDIO && HAS_IOMEM
 -      select MDIO_BUS_MUX
 -      help
 -        This module provides a driver for MDIO bus multiplexers that
 -        are controlled via a simple memory-mapped device, like an FPGA.
 -        The multiplexer connects one of several child MDIO busses to a
 -        parent bus.  Child bus selection is under the control of one of
 -        the FPGA's registers.
 +config QSEMI_PHY
 +      tristate "Quality Semiconductor PHYs"
 +      ---help---
 +        Currently supports the qs6612
  
 -        Currently, only 8-bit registers are supported.
 +config REALTEK_PHY
 +      tristate "Realtek PHYs"
 +      ---help---
 +        Supports the Realtek 821x PHY.
  
 -config MDIO_BUS_MUX_BCM_IPROC
 -      tristate "Support for iProc based MDIO bus multiplexers"
 -      depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
 -      select MDIO_BUS_MUX
 -      default ARCH_BCM_IPROC
 -      help
 -        This module provides a driver for MDIO bus multiplexers found in
 -        iProc based Broadcom SoCs. This multiplexer connects one of several
 -        child MDIO bus to a parent bus. Buses could be internal as well as
 -        external and selection logic lies inside the same multiplexer.
 +config SMSC_PHY
 +      tristate "SMSC PHYs"
 +      ---help---
 +        Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
  
 -config MDIO_BCM_UNIMAC
 -      tristate "Broadcom UniMAC MDIO bus controller"
 -      depends on HAS_IOMEM
 -      help
 -        This module provides a driver for the Broadcom UniMAC MDIO busses.
 -        This hardware can be found in the Broadcom GENET Ethernet MAC
 -        controllers as well as some Broadcom Ethernet switches such as the
 -        Starfighter 2 switches.
 +config STE10XP
 +      tristate "STMicroelectronics STe10Xp PHYs"
 +      ---help---
 +        This is the driver for the STe100p and STe101p PHYs.
  
 -config MDIO_BCM_IPROC
 -      tristate "Broadcom iProc MDIO bus controller"
 -      depends on ARCH_BCM_IPROC || COMPILE_TEST
 -      depends on HAS_IOMEM && OF_MDIO
 -      help
 -        This module provides a driver for the MDIO busses found in the
 -        Broadcom iProc SoC's.
 +config TERANETICS_PHY
 +        tristate "Teranetics PHYs"
 +        ---help---
 +          Currently supports the Teranetics TN2020
  
 -config INTEL_XWAY_PHY
 -      tristate "Driver for Intel XWAY PHYs"
 -      ---help---
 -        Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
 -        These PHYs are marked as standalone chips under the names
 -        PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
 -        SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
 +config VITESSE_PHY
 +        tristate "Vitesse PHYs"
 +        ---help---
 +          Currently supports the vsc8244
  
 -config MDIO_HISI_FEMAC
 -      tristate "Hisilicon FEMAC MDIO bus controller"
 -      depends on HAS_IOMEM && OF_MDIO
 -      help
 -        This module provides a driver for the MDIO busses found in the
 -        Hisilicon SoC that have an Fast Ethernet MAC.
 +config XILINX_GMII2RGMII
 +       tristate "Xilinx GMII2RGMII converter driver"
 +       ---help---
 +         This driver support xilinx GMII to RGMII IP core it provides
 +         the Reduced Gigabit Media Independent Interface(RGMII) between
 +         Ethernet physical media devices and the Gigabit Ethernet controller.
  
+ config MDIO_XGENE
+       tristate "APM X-Gene SoC MDIO bus controller"
+       depends on ARCH_XGENE || COMPILE_TEST
+       help
+         This module provides a driver for the MDIO busses found in the
+         APM X-Gene SoC's.
  endif # PHYLIB
  
  config MICREL_KS8995MA
diff --combined drivers/net/vxlan.c
@@@ -27,6 -27,7 +27,6 @@@
  #include <net/net_namespace.h>
  #include <net/netns/generic.h>
  #include <net/vxlan.h>
 -#include <net/protocol.h>
  
  #if IS_ENABLED(CONFIG_IPV6)
  #include <net/ip6_tunnel.h>
@@@ -287,7 -288,7 +287,7 @@@ static int vxlan_fdb_info(struct sk_buf
  
        if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
            nla_put_s32(skb, NDA_LINK_NETNSID,
 -                      peernet2id_alloc(dev_net(vxlan->dev), vxlan->net)))
 +                      peernet2id(dev_net(vxlan->dev), vxlan->net)))
                goto nla_put_failure;
  
        if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@@ -860,20 -861,20 +860,20 @@@ out
  /* Dump forwarding table */
  static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
                          struct net_device *dev,
 -                        struct net_device *filter_dev, int idx)
 +                        struct net_device *filter_dev, int *idx)
  {
        struct vxlan_dev *vxlan = netdev_priv(dev);
        unsigned int h;
 +      int err = 0;
  
        for (h = 0; h < FDB_HASH_SIZE; ++h) {
                struct vxlan_fdb *f;
 -              int err;
  
                hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
                        struct vxlan_rdst *rd;
  
                        list_for_each_entry_rcu(rd, &f->remotes, list) {
 -                              if (idx < cb->args[0])
 +                              if (*idx < cb->args[2])
                                        goto skip;
  
                                err = vxlan_fdb_info(skb, vxlan, f,
                                                     cb->nlh->nlmsg_seq,
                                                     RTM_NEWNEIGH,
                                                     NLM_F_MULTI, rd);
 -                              if (err < 0) {
 -                                      cb->args[1] = err;
 +                              if (err < 0)
                                        goto out;
 -                              }
  skip:
 -                              ++idx;
 +                              *idx += 1;
                        }
                }
        }
  out:
 -      return idx;
 +      return err;
  }
  
  /* Watch incoming packets to learn mapping between Ethernet address
@@@ -1291,7 -1294,7 +1291,7 @@@ static int vxlan_rcv(struct sock *sk, s
                struct metadata_dst *tun_dst;
  
                tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
 -                                       vxlan_vni_to_tun_id(vni), sizeof(*md));
 +                                       key32_to_tunnel_id(vni), sizeof(*md));
  
                if (!tun_dst)
                        goto drop;
@@@ -1945,7 -1948,7 +1945,7 @@@ static void vxlan_xmit_one(struct sk_bu
                        goto drop;
                }
                dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
 -              vni = vxlan_tun_id_to_vni(info->key.tun_id);
 +              vni = tunnel_id_to_key32(info->key.tun_id);
                remote_ip.sa.sa_family = ip_tunnel_info_af(info);
                if (remote_ip.sa.sa_family == AF_INET) {
                        remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
                                      vni, md, flags, udp_sum);
                if (err < 0) {
                        dst_release(ndst);
 +                      dev->stats.tx_errors++;
                        return;
                }
                udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
@@@ -2780,14 -2782,15 +2780,15 @@@ static int vxlan_dev_configure(struct n
        struct net_device *lowerdev = NULL;
  
        if (conf->flags & VXLAN_F_GPE) {
-               if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
-                       return -EINVAL;
                /* For now, allow GPE only together with COLLECT_METADATA.
                 * This can be relaxed later; in such case, the other side
                 * of the PtP link will have to be provided.
                 */
-               if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
+               if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+                   !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+                       pr_info("unsupported combination of extensions\n");
                        return -EINVAL;
+               }
  
                vxlan_raw_setup(dev);
        } else {
                        dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
  
                needed_headroom = lowerdev->hard_header_len;
+       } else if (vxlan_addr_multicast(&dst->remote_ip)) {
+               pr_info("multicast destination requires interface to be specified\n");
+               return -EINVAL;
        }
  
        if (conf->mtu) {
                     tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
                    tmp->cfg.dst_port == vxlan->cfg.dst_port &&
                    (tmp->flags & VXLAN_F_RCV_FLAGS) ==
-                   (vxlan->flags & VXLAN_F_RCV_FLAGS))
-               return -EEXIST;
+                   (vxlan->flags & VXLAN_F_RCV_FLAGS)) {
+                       pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni));
+                       return -EEXIST;
+               }
        }
  
        dev->ethtool_ops = &vxlan_ethtool_ops;
@@@ -2907,7 -2915,6 +2913,6 @@@ static int vxlan_newlink(struct net *sr
                         struct nlattr *tb[], struct nlattr *data[])
  {
        struct vxlan_config conf;
-       int err;
  
        memset(&conf, 0, sizeof(conf));
  
        if (tb[IFLA_MTU])
                conf.mtu = nla_get_u32(tb[IFLA_MTU]);
  
-       err = vxlan_dev_configure(src_net, dev, &conf);
-       switch (err) {
-       case -ENODEV:
-               pr_info("ifindex %d does not exist\n", conf.remote_ifindex);
-               break;
-       case -EPERM:
-               pr_info("IPv6 is disabled via sysctl\n");
-               break;
-       case -EEXIST:
-               pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
-               break;
-       case -EINVAL:
-               pr_info("unsupported combination of extensions\n");
-               break;
-       }
-       return err;
+       return vxlan_dev_configure(src_net, dev, &conf);
  }
  
  static void vxlan_dellink(struct net_device *dev, struct list_head *head)
@@@ -152,7 -152,7 +152,7 @@@ enum 
        MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
        MLX5_CMD_OP_ACCESS_REG                    = 0x805,
        MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
 -      MLX5_CMD_OP_DETTACH_FROM_MCG              = 0x807,
 +      MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
        MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
        MLX5_CMD_OP_MAD_IFC                       = 0x50d,
        MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
        MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
        MLX5_CMD_OP_SET_WOL_ROL                   = 0x830,
        MLX5_CMD_OP_QUERY_WOL_ROL                 = 0x831,
 +      MLX5_CMD_OP_CREATE_LAG                    = 0x840,
 +      MLX5_CMD_OP_MODIFY_LAG                    = 0x841,
 +      MLX5_CMD_OP_QUERY_LAG                     = 0x842,
 +      MLX5_CMD_OP_DESTROY_LAG                   = 0x843,
 +      MLX5_CMD_OP_CREATE_VPORT_LAG              = 0x844,
 +      MLX5_CMD_OP_DESTROY_VPORT_LAG             = 0x845,
        MLX5_CMD_OP_CREATE_TIR                    = 0x900,
        MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
        MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
        MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
        MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
        MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
 +      MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
 +      MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
        MLX5_CMD_OP_MAX
  };
  
@@@ -289,9 -281,7 +289,9 @@@ struct mlx5_ifc_flow_table_prop_layout_
        u8         modify_root[0x1];
        u8         identified_miss_table_mode[0x1];
        u8         flow_table_modify[0x1];
 -      u8         reserved_at_7[0x19];
 +      u8         encap[0x1];
 +      u8         decap[0x1];
 +      u8         reserved_at_9[0x17];
  
        u8         reserved_at_20[0x2];
        u8         log_max_ft_size[0x6];
@@@ -483,9 -473,7 +483,9 @@@ struct mlx5_ifc_ads_bits 
  
  struct mlx5_ifc_flow_table_nic_cap_bits {
        u8         nic_rx_multi_path_tirs[0x1];
 -      u8         reserved_at_1[0x1ff];
 +      u8         nic_rx_multi_path_tirs_fts[0x1];
 +      u8         allow_sniffer_and_nic_rx_shared_tir[0x1];
 +      u8         reserved_at_3[0x1fd];
  
        struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
  
@@@ -524,15 -512,7 +524,15 @@@ struct mlx5_ifc_e_switch_cap_bits 
        u8         nic_vport_node_guid_modify[0x1];
        u8         nic_vport_port_guid_modify[0x1];
  
 -      u8         reserved_at_20[0x7e0];
 +      u8         vxlan_encap_decap[0x1];
 +      u8         nvgre_encap_decap[0x1];
 +      u8         reserved_at_22[0x9];
 +      u8         log_max_encap_headers[0x5];
 +      u8         reserved_2b[0x6];
 +      u8         max_encap_header_size[0xa];
 +
 +      u8         reserved_40[0x7c0];
 +
  };
  
  struct mlx5_ifc_qos_cap_bits {
@@@ -787,9 -767,7 +787,9 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
        u8         out_of_seq_cnt[0x1];
        u8         vport_counters[0x1];
        u8         retransmission_q_counters[0x1];
 -      u8         reserved_at_183[0x3];
 +      u8         reserved_at_183[0x1];
 +      u8         modify_rq_counter_set_id[0x1];
 +      u8         reserved_at_185[0x1];
        u8         max_qp_cnt[0xa];
        u8         pkey_table_size[0x10];
  
        u8         pad_tx_eth_packet[0x1];
        u8         reserved_at_263[0x8];
        u8         log_bf_reg_size[0x5];
 -      u8         reserved_at_270[0x10];
 +
 +      u8         reserved_at_270[0xb];
 +      u8         lag_master[0x1];
 +      u8         num_lag_ports[0x4];
  
        u8         reserved_at_280[0x10];
        u8         max_wqe_sz_sq[0x10];
@@@ -1929,7 -1904,7 +1929,7 @@@ enum 
  
  struct mlx5_ifc_qpc_bits {
        u8         state[0x4];
 -      u8         reserved_at_4[0x4];
 +      u8         lag_tx_port_affinity[0x4];
        u8         st[0x8];
        u8         reserved_at_10[0x3];
        u8         pm_state[0x2];
        u8         reserved_at_3e0[0x8];
        u8         cqn_snd[0x18];
  
 -      u8         reserved_at_400[0x40];
 +      u8         reserved_at_400[0x8];
 +      u8         deth_sqpn[0x18];
 +
 +      u8         reserved_at_420[0x20];
  
        u8         reserved_at_440[0x8];
        u8         last_acked_psn[0x18];
@@@ -2092,8 -2064,6 +2092,8 @@@ enum 
        MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
        MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
 +      MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
 +      MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
  };
  
  struct mlx5_ifc_flow_context_bits {
        u8         reserved_at_a0[0x8];
        u8         flow_counter_list_size[0x18];
  
 -      u8         reserved_at_c0[0x140];
 +      u8         encap_id[0x20];
 +
 +      u8         reserved_at_e0[0x120];
  
        struct mlx5_ifc_fte_match_param_bits match_value;
  
@@@ -2178,11 -2146,7 +2178,11 @@@ struct mlx5_ifc_traffic_counter_bits 
  };
  
  struct mlx5_ifc_tisc_bits {
 -      u8         reserved_at_0[0xc];
 +      u8         strict_lag_tx_port_affinity[0x1];
 +      u8         reserved_at_1[0x3];
 +      u8         lag_tx_port_affinity[0x04];
 +
 +      u8         reserved_at_8[0x4];
        u8         prio[0x4];
        u8         reserved_at_10[0x10];
  
@@@ -2844,7 -2808,7 +2844,7 @@@ struct mlx5_ifc_xrqc_bits 
  
        struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
  
 -      u8         reserved_at_180[0x180];
 +      u8         reserved_at_180[0x200];
  
        struct mlx5_ifc_wq_bits wq;
  };
@@@ -3525,7 -3489,7 +3525,7 @@@ struct mlx5_ifc_query_special_contexts_
  
        u8         syndrome[0x20];
  
 -      u8         reserved_at_40[0x20];
 +      u8         dump_fill_mkey[0x20];
  
        u8         resd_lkey[0x20];
  };
@@@ -4249,85 -4213,6 +4249,85 @@@ struct mlx5_ifc_query_eq_in_bits 
        u8         reserved_at_60[0x20];
  };
  
 +struct mlx5_ifc_encap_header_in_bits {
 +      u8         reserved_at_0[0x5];
 +      u8         header_type[0x3];
 +      u8         reserved_at_8[0xe];
 +      u8         encap_header_size[0xa];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         encap_header[2][0x8];
 +
 +      u8         more_encap_header[0][0x8];
 +};
 +
 +struct mlx5_ifc_query_encap_header_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0xa0];
 +
 +      struct mlx5_ifc_encap_header_in_bits encap_header[0];
 +};
 +
 +struct mlx5_ifc_query_encap_header_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         encap_id[0x20];
 +
 +      u8         reserved_at_60[0xa0];
 +};
 +
 +struct mlx5_ifc_alloc_encap_header_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         encap_id[0x20];
 +
 +      u8         reserved_at_60[0x20];
 +};
 +
 +struct mlx5_ifc_alloc_encap_header_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         reserved_at_40[0xa0];
 +
 +      struct mlx5_ifc_encap_header_in_bits encap_header;
 +};
 +
 +struct mlx5_ifc_dealloc_encap_header_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_dealloc_encap_header_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         encap_id[0x20];
 +
 +      u8         reserved_60[0x20];
 +};
 +
  struct mlx5_ifc_query_dct_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@@ -4632,9 -4517,7 +4632,9 @@@ struct mlx5_ifc_modify_tis_out_bits 
  struct mlx5_ifc_modify_tis_bitmask_bits {
        u8         reserved_at_0[0x20];
  
 -      u8         reserved_at_20[0x1f];
 +      u8         reserved_at_20[0x1d];
 +      u8         lag_tx_port_affinity[0x1];
 +      u8         strict_lag_tx_port_affinity[0x1];
        u8         prio[0x1];
  };
  
@@@ -4769,11 -4652,6 +4769,11 @@@ struct mlx5_ifc_modify_rq_out_bits 
        u8         reserved_at_40[0x40];
  };
  
 +enum {
 +      MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
 +      MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
 +};
 +
  struct mlx5_ifc_modify_rq_in_bits {
        u8         opcode[0x10];
        u8         reserved_at_10[0x10];
@@@ -4843,7 -4721,7 +4843,7 @@@ struct mlx5_ifc_modify_nic_vport_field_
        u8         reserved_at_0[0x16];
        u8         node_guid[0x1];
        u8         port_guid[0x1];
 -      u8         reserved_at_18[0x1];
 +      u8         min_inline[0x1];
        u8         mtu[0x1];
        u8         change_event[0x1];
        u8         promisc[0x1];
@@@ -6221,9 -6099,7 +6221,9 @@@ struct mlx5_ifc_create_flow_table_in_bi
  
        u8         reserved_at_a0[0x20];
  
 -      u8         reserved_at_c0[0x4];
 +      u8         encap_en[0x1];
 +      u8         decap_en[0x1];
 +      u8         reserved_at_c2[0x2];
        u8         table_miss_mode[0x4];
        u8         level[0x8];
        u8         reserved_at_d0[0x8];
        u8         reserved_at_e0[0x8];
        u8         table_miss_id[0x18];
  
 -      u8         reserved_at_100[0x100];
 +      u8         reserved_at_100[0x8];
 +      u8         lag_master_next_table_id[0x18];
 +
 +      u8         reserved_at_120[0x80];
  };
  
  struct mlx5_ifc_create_flow_group_out_bits {
@@@ -6837,9 -6710,10 +6837,10 @@@ struct mlx5_ifc_pude_reg_bits 
  };
  
  struct mlx5_ifc_ptys_reg_bits {
-       u8         an_disable_cap[0x1];
+       u8         reserved_at_0[0x1];
        u8         an_disable_admin[0x1];
-       u8         reserved_at_2[0x6];
+       u8         an_disable_cap[0x1];
+       u8         reserved_at_3[0x5];
        u8         local_port[0x8];
        u8         reserved_at_10[0xd];
        u8         proto_mask[0x3];
@@@ -7689,8 -7563,7 +7690,8 @@@ struct mlx5_ifc_set_flow_table_root_in_
  };
  
  enum {
 -      MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
 +      MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID     = (1UL << 0),
 +      MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID = (1UL << 15),
  };
  
  struct mlx5_ifc_modify_flow_table_out_bits {
@@@ -7729,10 -7602,7 +7730,10 @@@ struct mlx5_ifc_modify_flow_table_in_bi
        u8         reserved_at_e0[0x8];
        u8         table_miss_id[0x18];
  
 -      u8         reserved_at_100[0x100];
 +      u8         reserved_at_100[0x8];
 +      u8         lag_master_next_table_id[0x18];
 +
 +      u8         reserved_at_120[0x80];
  };
  
  struct mlx5_ifc_ets_tcn_config_reg_bits {
@@@ -7840,134 -7710,4 +7841,134 @@@ struct mlx5_ifc_dcbx_param_bits 
        u8         error[0x8];
        u8         reserved_at_a0[0x160];
  };
 +
 +struct mlx5_ifc_lagc_bits {
 +      u8         reserved_at_0[0x1d];
 +      u8         lag_state[0x3];
 +
 +      u8         reserved_at_20[0x14];
 +      u8         tx_remap_affinity_2[0x4];
 +      u8         reserved_at_38[0x4];
 +      u8         tx_remap_affinity_1[0x4];
 +};
 +
 +struct mlx5_ifc_create_lag_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_create_lag_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      struct mlx5_ifc_lagc_bits ctx;
 +};
 +
 +struct mlx5_ifc_modify_lag_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_modify_lag_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         reserved_at_40[0x20];
 +      u8         field_select[0x20];
 +
 +      struct mlx5_ifc_lagc_bits ctx;
 +};
 +
 +struct mlx5_ifc_query_lag_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +
 +      struct mlx5_ifc_lagc_bits ctx;
 +};
 +
 +struct mlx5_ifc_query_lag_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_destroy_lag_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_destroy_lag_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_create_vport_lag_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_create_vport_lag_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_destroy_vport_lag_out_bits {
 +      u8         status[0x8];
 +      u8         reserved_at_8[0x18];
 +
 +      u8         syndrome[0x20];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
 +struct mlx5_ifc_destroy_vport_lag_in_bits {
 +      u8         opcode[0x10];
 +      u8         reserved_at_10[0x10];
 +
 +      u8         reserved_at_20[0x10];
 +      u8         op_mod[0x10];
 +
 +      u8         reserved_at_40[0x40];
 +};
 +
  #endif /* MLX5_IFC_H */
@@@ -52,7 -52,6 +52,7 @@@
  #include <uapi/linux/netdevice.h>
  #include <uapi/linux/if_bonding.h>
  #include <uapi/linux/pkt_cls.h>
 +#include <linux/hashtable.h>
  
  struct netpoll_info;
  struct device;
@@@ -1031,7 -1030,7 +1031,7 @@@ struct netdev_xdp 
   *    Deletes the FDB entry from dev coresponding to addr.
   * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
   *                   struct net_device *dev, struct net_device *filter_dev,
 - *                   int idx)
 + *                   int *idx)
   *    Used to add FDB entries to dump requests. Implementers should add
   *    entries to skb and update idx with the number of entries.
   *
@@@ -1263,7 -1262,7 +1263,7 @@@ struct net_device_ops 
                                                struct netlink_callback *cb,
                                                struct net_device *dev,
                                                struct net_device *filter_dev,
 -                                              int idx);
 +                                              int *idx);
  
        int                     (*ndo_bridge_setlink)(struct net_device *dev,
                                                      struct nlmsghdr *nlh,
@@@ -1562,6 -1561,8 +1562,6 @@@ enum netdev_priv_flags 
   *
   *    @xps_maps:      XXX: need comments on this one
   *
 - *    @offload_fwd_mark:      Offload device fwding mark
 - *
   *    @watchdog_timeo:        Represents the timeout that is used by
   *                            the watchdog (see dev_watchdog())
   *    @watchdog_timer:        List of timers
@@@ -1799,9 -1800,6 +1799,9 @@@ struct net_device 
        unsigned int            num_tx_queues;
        unsigned int            real_num_tx_queues;
        struct Qdisc            *qdisc;
 +#ifdef CONFIG_NET_SCHED
 +      DECLARE_HASHTABLE       (qdisc_hash, 4);
 +#endif
        unsigned long           tx_queue_len;
        spinlock_t              tx_global_lock;
        int                     watchdog_timeo;
  #ifdef CONFIG_NET_CLS_ACT
        struct tcf_proto __rcu  *egress_cl_list;
  #endif
 -#ifdef CONFIG_NET_SWITCHDEV
 -      u32                     offload_fwd_mark;
 -#endif
  
        /* These may be needed for future network-power-down code. */
        struct timer_list       watchdog_timer;
@@@ -3266,6 -3267,7 +3266,7 @@@ static inline void napi_free_frags(stru
        napi->skb = NULL;
  }
  
+ bool netdev_is_rx_handler_busy(struct net_device *dev);
  int netdev_rx_handler_register(struct net_device *dev,
                               rx_handler_func_t *rx_handler,
                               void *rx_handler_data);
diff --combined net/bridge/br_input.c
@@@ -80,13 -80,10 +80,10 @@@ static void br_do_proxy_arp(struct sk_b
  
        BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
  
-       if (dev->flags & IFF_NOARP)
+       if ((dev->flags & IFF_NOARP) ||
+           !pskb_may_pull(skb, arp_hdr_len(dev)))
                return;
  
-       if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
-               dev->stats.tx_dropped++;
-               return;
-       }
        parp = arp_hdr(skb);
  
        if (parp->ar_pro != htons(ETH_P_IP) ||
  /* note: already called with rcu_read_lock */
  int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
  {
 -      bool local_rcv = false, mcast_hit = false, unicast = true;
        struct net_bridge_port *p = br_port_get_rcu(skb->dev);
        const unsigned char *dest = eth_hdr(skb)->h_dest;
 +      enum br_pkt_type pkt_type = BR_PKT_UNICAST;
        struct net_bridge_fdb_entry *dst = NULL;
        struct net_bridge_mdb_entry *mdst;
 +      bool local_rcv, mcast_hit = false;
        struct net_bridge *br;
        u16 vid = 0;
  
        if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
                goto out;
  
 +      nbp_switchdev_frame_mark(p, skb);
 +
        /* insert into forwarding database after filtering to avoid spoofing */
        br = p->br;
        if (p->flags & BR_LEARNING)
                br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
  
 -      if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
 -          br_multicast_rcv(br, p, skb, vid))
 -              goto drop;
 +      local_rcv = !!(br->dev->flags & IFF_PROMISC);
 +      if (is_multicast_ether_addr(dest)) {
 +              /* by definition the broadcast is also a multicast address */
 +              if (is_broadcast_ether_addr(dest)) {
 +                      pkt_type = BR_PKT_BROADCAST;
 +                      local_rcv = true;
 +              } else {
 +                      pkt_type = BR_PKT_MULTICAST;
 +                      if (br_multicast_rcv(br, p, skb, vid))
 +                              goto drop;
 +              }
 +      }
  
        if (p->state == BR_STATE_LEARNING)
                goto drop;
  
        BR_INPUT_SKB_CB(skb)->brdev = br->dev;
  
 -      local_rcv = !!(br->dev->flags & IFF_PROMISC);
 -
        if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
                br_do_proxy_arp(skb, br, vid, p);
  
 -      if (is_broadcast_ether_addr(dest)) {
 -              local_rcv = true;
 -              unicast = false;
 -      } else if (is_multicast_ether_addr(dest)) {
 +      switch (pkt_type) {
 +      case BR_PKT_MULTICAST:
                mdst = br_mdb_get(br, skb, vid);
                if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
                    br_multicast_querier_exists(br, eth_hdr(skb))) {
                        local_rcv = true;
                        br->dev->stats.multicast++;
                }
 -              unicast = false;
 -      } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {
 -              /* Do not forward the packet since it's local. */
 -              return br_pass_frame_up(skb);
 +              break;
 +      case BR_PKT_UNICAST:
 +              dst = __br_fdb_get(br, dest, vid);
 +      default:
 +              break;
        }
  
        if (dst) {
 +              if (dst->is_local)
 +                      return br_pass_frame_up(skb);
 +
                dst->used = jiffies;
                br_forward(dst->dst, skb, local_rcv, false);
        } else {
                if (!mcast_hit)
 -                      br_flood(br, skb, unicast, local_rcv, false);
 +                      br_flood(br, skb, pkt_type, local_rcv, false);
                else
                        br_multicast_flood(mdst, skb, local_rcv, false);
        }
diff --combined net/core/dev.c
@@@ -3355,6 -3355,16 +3355,6 @@@ static int __dev_queue_xmit(struct sk_b
        else
                skb_dst_force(skb);
  
 -#ifdef CONFIG_NET_SWITCHDEV
 -      /* Don't forward if offload device already forwarded */
 -      if (skb->offload_fwd_mark &&
 -          skb->offload_fwd_mark == dev->offload_fwd_mark) {
 -              consume_skb(skb);
 -              rc = NET_XMIT_SUCCESS;
 -              goto out;
 -      }
 -#endif
 -
        txq = netdev_pick_tx(dev, skb, accel_priv);
        q = rcu_dereference_bh(txq->qdisc);
  
@@@ -3904,7 -3914,8 +3904,7 @@@ static void net_tx_action(struct softir
        }
  }
  
 -#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
 -    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
  /* This hook is defined here for ATM LANE */
  int (*br_fdb_test_addr_hook)(struct net_device *dev,
                             unsigned char *addr) __read_mostly;
@@@ -3963,6 -3974,22 +3963,22 @@@ sch_handle_ingress(struct sk_buff *skb
        return skb;
  }
  
+ /**
+  *    netdev_is_rx_handler_busy - check if receive handler is registered
+  *    @dev: device to check
+  *
+  *    Check if a receive handler is already registered for a given device.
+  *    Return true if there one.
+  *
+  *    The caller must hold the rtnl_mutex.
+  */
+ bool netdev_is_rx_handler_busy(struct net_device *dev)
+ {
+       ASSERT_RTNL();
+       return dev && rtnl_dereference(dev->rx_handler);
+ }
+ EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
  /**
   *    netdev_rx_handler_register - register receive handler
   *    @dev: device to register a handler for
@@@ -4281,53 -4308,32 +4297,53 @@@ int netif_receive_skb(struct sk_buff *s
  }
  EXPORT_SYMBOL(netif_receive_skb);
  
 -/* Network device is going away, flush any packets still pending
 - * Called with irqs disabled.
 - */
 -static void flush_backlog(void *arg)
 +DEFINE_PER_CPU(struct work_struct, flush_works);
 +
 +/* Network device is going away, flush any packets still pending */
 +static void flush_backlog(struct work_struct *work)
  {
 -      struct net_device *dev = arg;
 -      struct softnet_data *sd = this_cpu_ptr(&softnet_data);
        struct sk_buff *skb, *tmp;
 +      struct softnet_data *sd;
  
 +      local_bh_disable();
 +      sd = this_cpu_ptr(&softnet_data);
 +
 +      local_irq_disable();
        rps_lock(sd);
        skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 -              if (skb->dev == dev) {
 +              if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->input_pkt_queue);
                        kfree_skb(skb);
                        input_queue_head_incr(sd);
                }
        }
        rps_unlock(sd);
 +      local_irq_enable();
  
        skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 -              if (skb->dev == dev) {
 +              if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->process_queue);
                        kfree_skb(skb);
                        input_queue_head_incr(sd);
                }
        }
 +      local_bh_enable();
 +}
 +
 +static void flush_all_backlogs(void)
 +{
 +      unsigned int cpu;
 +
 +      get_online_cpus();
 +
 +      for_each_online_cpu(cpu)
 +              queue_work_on(cpu, system_highpri_wq,
 +                            per_cpu_ptr(&flush_works, cpu));
 +
 +      for_each_online_cpu(cpu)
 +              flush_work(per_cpu_ptr(&flush_works, cpu));
 +
 +      put_online_cpus();
  }
  
  static int napi_gro_complete(struct sk_buff *skb)
@@@ -4815,9 -4821,8 +4831,9 @@@ static bool sd_has_rps_ipi_waiting(stru
  
  static int process_backlog(struct napi_struct *napi, int quota)
  {
 -      int work = 0;
        struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 +      bool again = true;
 +      int work = 0;
  
        /* Check if we have pending ipi, its better to send them now,
         * not waiting net_rx_action() end.
        }
  
        napi->weight = weight_p;
 -      local_irq_disable();
 -      while (1) {
 +      while (again) {
                struct sk_buff *skb;
  
                while ((skb = __skb_dequeue(&sd->process_queue))) {
                        rcu_read_lock();
 -                      local_irq_enable();
                        __netif_receive_skb(skb);
                        rcu_read_unlock();
 -                      local_irq_disable();
                        input_queue_head_incr(sd);
 -                      if (++work >= quota) {
 -                              local_irq_enable();
 +                      if (++work >= quota)
                                return work;
 -                      }
 +
                }
  
 +              local_irq_disable();
                rps_lock(sd);
                if (skb_queue_empty(&sd->input_pkt_queue)) {
                        /*
                         * and we dont need an smp_mb() memory barrier.
                         */
                        napi->state = 0;
 -                      rps_unlock(sd);
 -
 -                      break;
 +                      again = false;
 +              } else {
 +                      skb_queue_splice_tail_init(&sd->input_pkt_queue,
 +                                                 &sd->process_queue);
                }
 -
 -              skb_queue_splice_tail_init(&sd->input_pkt_queue,
 -                                         &sd->process_queue);
                rps_unlock(sd);
 +              local_irq_enable();
        }
 -      local_irq_enable();
  
        return work;
  }
@@@ -6713,8 -6723,8 +6729,8 @@@ static void rollback_registered_many(st
                unlist_netdevice(dev);
  
                dev->reg_state = NETREG_UNREGISTERING;
 -              on_each_cpu(flush_backlog, dev, 1);
        }
 +      flush_all_backlogs();
  
        synchronize_net();
  
@@@ -7631,9 -7641,6 +7647,9 @@@ struct net_device *alloc_netdev_mqs(in
        INIT_LIST_HEAD(&dev->all_adj_list.lower);
        INIT_LIST_HEAD(&dev->ptype_all);
        INIT_LIST_HEAD(&dev->ptype_specific);
 +#ifdef CONFIG_NET_SCHED
 +      hash_init(dev->qdisc_hash);
 +#endif
        dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
        setup(dev);
  
@@@ -8279,11 -8286,8 +8295,11 @@@ static int __init net_dev_init(void
         */
  
        for_each_possible_cpu(i) {
 +              struct work_struct *flush = per_cpu_ptr(&flush_works, i);
                struct softnet_data *sd = &per_cpu(softnet_data, i);
  
 +              INIT_WORK(flush, flush_backlog);
 +
                skb_queue_head_init(&sd->input_pkt_queue);
                skb_queue_head_init(&sd->process_queue);
                INIT_LIST_HEAD(&sd->poll_list);
@@@ -6,8 -6,6 +6,8 @@@
  #include <linux/if_vlan.h>
  #include <net/ip.h>
  #include <net/ipv6.h>
 +#include <net/gre.h>
 +#include <net/pptp.h>
  #include <linux/igmp.h>
  #include <linux/icmp.h>
  #include <linux/sctp.h>
@@@ -118,16 -116,13 +118,16 @@@ bool __skb_flow_dissect(const struct sk
        struct flow_dissector_key_addrs *key_addrs;
        struct flow_dissector_key_ports *key_ports;
        struct flow_dissector_key_tags *key_tags;
 +      struct flow_dissector_key_vlan *key_vlan;
        struct flow_dissector_key_keyid *key_keyid;
 +      bool skip_vlan = false;
        u8 ip_proto = 0;
        bool ret = false;
  
        if (!data) {
                data = skb->data;
 -              proto = skb->protocol;
 +              proto = skb_vlan_tag_present(skb) ?
 +                       skb->vlan_proto : skb->protocol;
                nhoff = skb_network_offset(skb);
                hlen = skb_headlen(skb);
        }
@@@ -246,45 -241,23 +246,45 @@@ ipv6
        case htons(ETH_P_8021AD):
        case htons(ETH_P_8021Q): {
                const struct vlan_hdr *vlan;
 -              struct vlan_hdr _vlan;
  
 -              vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
 -              if (!vlan)
 -                      goto out_bad;
 +              if (skb_vlan_tag_present(skb))
 +                      proto = skb->protocol;
 +
 +              if (!skb_vlan_tag_present(skb) ||
 +                  proto == cpu_to_be16(ETH_P_8021Q) ||
 +                  proto == cpu_to_be16(ETH_P_8021AD)) {
 +                      struct vlan_hdr _vlan;
  
 +                      vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 +                                                  data, hlen, &_vlan);
 +                      if (!vlan)
 +                              goto out_bad;
 +                      proto = vlan->h_vlan_encapsulated_proto;
 +                      nhoff += sizeof(*vlan);
 +                      if (skip_vlan)
 +                              goto again;
 +              }
 +
 +              skip_vlan = true;
                if (dissector_uses_key(flow_dissector,
 -                                     FLOW_DISSECTOR_KEY_VLANID)) {
 -                      key_tags = skb_flow_dissector_target(flow_dissector,
 -                                                           FLOW_DISSECTOR_KEY_VLANID,
 +                                     FLOW_DISSECTOR_KEY_VLAN)) {
 +                      key_vlan = skb_flow_dissector_target(flow_dissector,
 +                                                           FLOW_DISSECTOR_KEY_VLAN,
                                                             target_container);
  
 -                      key_tags->vlan_id = skb_vlan_tag_get_id(skb);
 +                      if (skb_vlan_tag_present(skb)) {
 +                              key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
 +                              key_vlan->vlan_priority =
 +                                      (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
 +                      } else {
 +                              key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
 +                                      VLAN_VID_MASK;
 +                              key_vlan->vlan_priority =
 +                                      (ntohs(vlan->h_vlan_TCI) &
 +                                       VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 +                      }
                }
  
 -              proto = vlan->h_vlan_encapsulated_proto;
 -              nhoff += sizeof(*vlan);
                goto again;
        }
        case htons(ETH_P_PPP_SES): {
@@@ -365,42 -338,32 +365,42 @@@ mpls
  ip_proto_again:
        switch (ip_proto) {
        case IPPROTO_GRE: {
 -              struct gre_hdr {
 -                      __be16 flags;
 -                      __be16 proto;
 -              } *hdr, _hdr;
 +              struct gre_base_hdr *hdr, _hdr;
 +              u16 gre_ver;
 +              int offset = 0;
  
                hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
                if (!hdr)
                        goto out_bad;
 -              /*
 -               * Only look inside GRE if version zero and no
 -               * routing
 -               */
 -              if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
 +
 +              /* Only look inside GRE without routing */
 +              if (hdr->flags & GRE_ROUTING)
                        break;
  
 -              proto = hdr->proto;
 -              nhoff += 4;
 +              /* Only look inside GRE for version 0 and 1 */
 +              gre_ver = ntohs(hdr->flags & GRE_VERSION);
 +              if (gre_ver > 1)
 +                      break;
 +
 +              proto = hdr->protocol;
 +              if (gre_ver) {
 +                      /* Version1 must be PPTP, and check the flags */
 +                      if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
 +                              break;
 +              }
 +
 +              offset += sizeof(struct gre_base_hdr);
 +
                if (hdr->flags & GRE_CSUM)
 -                      nhoff += 4;
 +                      offset += sizeof(((struct gre_full_hdr *)0)->csum) +
 +                                sizeof(((struct gre_full_hdr *)0)->reserved1);
 +
                if (hdr->flags & GRE_KEY) {
                        const __be32 *keyid;
                        __be32 _keyid;
  
 -                      keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
 +                      keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
                                                     data, hlen, &_keyid);
 -
                        if (!keyid)
                                goto out_bad;
  
                                key_keyid = skb_flow_dissector_target(flow_dissector,
                                                                      FLOW_DISSECTOR_KEY_GRE_KEYID,
                                                                      target_container);
 -                              key_keyid->keyid = *keyid;
 +                              if (gre_ver == 0)
 +                                      key_keyid->keyid = *keyid;
 +                              else
 +                                      key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
                        }
 -                      nhoff += 4;
 +                      offset += sizeof(((struct gre_full_hdr *)0)->key);
                }
 +
                if (hdr->flags & GRE_SEQ)
 -                      nhoff += 4;
 -              if (proto == htons(ETH_P_TEB)) {
 -                      const struct ethhdr *eth;
 -                      struct ethhdr _eth;
 -
 -                      eth = __skb_header_pointer(skb, nhoff,
 -                                                 sizeof(_eth),
 -                                                 data, hlen, &_eth);
 -                      if (!eth)
 +                      offset += sizeof(((struct pptp_gre_header *)0)->seq);
 +
 +              if (gre_ver == 0) {
 +                      if (proto == htons(ETH_P_TEB)) {
 +                              const struct ethhdr *eth;
 +                              struct ethhdr _eth;
 +
 +                              eth = __skb_header_pointer(skb, nhoff + offset,
 +                                                         sizeof(_eth),
 +                                                         data, hlen, &_eth);
 +                              if (!eth)
 +                                      goto out_bad;
 +                              proto = eth->h_proto;
 +                              offset += sizeof(*eth);
 +
 +                              /* Cap headers that we access via pointers at the
 +                               * end of the Ethernet header as our maximum alignment
 +                               * at that point is only 2 bytes.
 +                               */
 +                              if (NET_IP_ALIGN)
 +                                      hlen = (nhoff + offset);
 +                      }
 +              } else { /* version 1, must be PPTP */
 +                      u8 _ppp_hdr[PPP_HDRLEN];
 +                      u8 *ppp_hdr;
 +
 +                      if (hdr->flags & GRE_ACK)
 +                              offset += sizeof(((struct pptp_gre_header *)0)->ack);
 +
 +                      ppp_hdr = skb_header_pointer(skb, nhoff + offset,
 +                                                   sizeof(_ppp_hdr), _ppp_hdr);
 +                      if (!ppp_hdr)
                                goto out_bad;
 -                      proto = eth->h_proto;
 -                      nhoff += sizeof(*eth);
 -
 -                      /* Cap headers that we access via pointers at the
 -                       * end of the Ethernet header as our maximum alignment
 -                       * at that point is only 2 bytes.
 -                       */
 -                      if (NET_IP_ALIGN)
 -                              hlen = nhoff;
 +
 +                      switch (PPP_PROTOCOL(ppp_hdr)) {
 +                      case PPP_IP:
 +                              proto = htons(ETH_P_IP);
 +                              break;
 +                      case PPP_IPV6:
 +                              proto = htons(ETH_P_IPV6);
 +                              break;
 +                      default:
 +                              /* Could probably catch some more like MPLS */
 +                              break;
 +                      }
 +
 +                      offset += PPP_HDRLEN;
                }
  
 +              nhoff += offset;
                key_control->flags |= FLOW_DIS_ENCAPSULATION;
                if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
                        goto out_good;
@@@ -750,11 -680,13 +750,13 @@@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmet
  void __skb_get_hash(struct sk_buff *skb)
  {
        struct flow_keys keys;
+       u32 hash;
  
        __flow_hash_secret_init();
  
-       __skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
-                         flow_keys_have_l4(&keys));
+       hash = ___skb_get_hash(skb, &keys, hashrnd);
+       __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
  }
  EXPORT_SYMBOL(__skb_get_hash);
  
@@@ -942,8 -874,8 +944,8 @@@ static const struct flow_dissector_key 
                .offset = offsetof(struct flow_keys, ports),
        },
        {
 -              .key_id = FLOW_DISSECTOR_KEY_VLANID,
 -              .offset = offsetof(struct flow_keys, tags),
 +              .key_id = FLOW_DISSECTOR_KEY_VLAN,
 +              .offset = offsetof(struct flow_keys, vlan),
        },
        {
                .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
diff --combined net/ipv4/fib_frontend.c
@@@ -93,6 -93,9 +93,6 @@@ struct fib_table *fib_new_table(struct 
                return NULL;
  
        switch (id) {
 -      case RT_TABLE_LOCAL:
 -              rcu_assign_pointer(net->ipv4.fib_local, tb);
 -              break;
        case RT_TABLE_MAIN:
                rcu_assign_pointer(net->ipv4.fib_main, tb);
                break;
@@@ -134,6 -137,9 +134,6 @@@ static void fib_replace_table(struct ne
  {
  #ifdef CONFIG_IP_MULTIPLE_TABLES
        switch (new->tb_id) {
 -      case RT_TABLE_LOCAL:
 -              rcu_assign_pointer(net->ipv4.fib_local, new);
 -              break;
        case RT_TABLE_MAIN:
                rcu_assign_pointer(net->ipv4.fib_main, new);
                break;
@@@ -503,6 -509,7 +503,7 @@@ static int rtentry_to_fib_config(struc
                if (!dev)
                        return -ENODEV;
                cfg->fc_oif = dev->ifindex;
+               cfg->fc_table = l3mdev_fib_table(dev);
                if (colon) {
                        struct in_ifaddr *ifa;
                        struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@@ -1021,7 -1028,7 +1022,7 @@@ no_promotions
                         * First of all, we scan fib_info list searching
                         * for stray nexthop entries, then ignite fib_flush.
                         */
-                       if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+                       if (fib_sync_down_addr(dev, ifa->ifa_local))
                                fib_flush(dev_net(dev));
                }
        }
@@@ -1243,6 -1250,7 +1244,6 @@@ static void ip_fib_net_exit(struct net 
  
        rtnl_lock();
  #ifdef CONFIG_IP_MULTIPLE_TABLES
 -      RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
  #endif
diff --combined net/ipv4/fib_semantics.c
@@@ -1057,6 -1057,7 +1057,7 @@@ struct fib_info *fib_create_info(struc
        fi->fib_priority = cfg->fc_priority;
        fi->fib_prefsrc = cfg->fc_prefsrc;
        fi->fib_type = cfg->fc_type;
+       fi->fib_tb_id = cfg->fc_table;
  
        fi->fib_nhs = nhs;
        change_nexthops(fi) {
@@@ -1337,18 -1338,21 +1338,21 @@@ nla_put_failure
   *   referring to it.
   * - device went down -> we must shutdown all nexthops going via it.
   */
- int fib_sync_down_addr(struct net *net, __be32 local)
+ int fib_sync_down_addr(struct net_device *dev, __be32 local)
  {
        int ret = 0;
        unsigned int hash = fib_laddr_hashfn(local);
        struct hlist_head *head = &fib_info_laddrhash[hash];
+       struct net *net = dev_net(dev);
+       int tb_id = l3mdev_fib_table(dev);
        struct fib_info *fi;
  
        if (!fib_info_laddrhash || local == 0)
                return 0;
  
        hlist_for_each_entry(fi, head, fib_lhash) {
-               if (!net_eq(fi->fib_net, net))
+               if (!net_eq(fi->fib_net, net) ||
+                   fi->fib_tb_id != tb_id)
                        continue;
                if (fi->fib_prefsrc == local) {
                        fi->fib_flags |= RTNH_F_DEAD;
@@@ -1576,8 -1580,7 +1580,8 @@@ static bool fib_good_nh(const struct fi
  
                rcu_read_lock_bh();
  
 -              n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
 +              n = __ipv4_neigh_lookup_noref(nh->nh_dev,
 +                                            (__force u32)nh->nh_gw);
                if (n)
                        state = n->nud_state;
  
diff --combined net/ipv4/xfrm4_policy.c
@@@ -29,7 -29,7 +29,7 @@@ static struct dst_entry *__xfrm4_dst_lo
        memset(fl4, 0, sizeof(*fl4));
        fl4->daddr = daddr->a4;
        fl4->flowi4_tos = tos;
-       fl4->flowi4_oif = oif;
+       fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
        if (saddr)
                fl4->saddr = saddr->a4;
  
@@@ -112,7 -112,7 +112,7 @@@ _decode_session4(struct sk_buff *skb, s
        int oif = 0;
  
        if (skb_dst(skb))
 -              oif = l3mdev_fib_oif(skb_dst(skb)->dev);
 +              oif = skb_dst(skb)->dev->ifindex;
  
        memset(fl4, 0, sizeof(struct flowi4));
        fl4->flowi4_mark = skb->mark;
diff --combined net/ipv6/ip6_tunnel.c
@@@ -64,8 -64,8 +64,8 @@@ MODULE_LICENSE("GPL")
  MODULE_ALIAS_RTNL_LINK("ip6tnl");
  MODULE_ALIAS_NETDEV("ip6tnl0");
  
 -#define HASH_SIZE_SHIFT  5
 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
 +#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
 +#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
  
  static bool log_ecn_error = true;
  module_param(log_ecn_error, bool, 0644);
@@@ -75,7 -75,7 +75,7 @@@ static u32 HASH(const struct in6_addr *
  {
        u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
  
 -      return hash_32(hash, HASH_SIZE_SHIFT);
 +      return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
  }
  
  static int ip6_tnl_dev_init(struct net_device *dev);
@@@ -87,7 -87,7 +87,7 @@@ struct ip6_tnl_net 
        /* the IPv6 tunnel fallback device */
        struct net_device *fb_tnl_dev;
        /* lists for storing tunnels in use */
 -      struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
 +      struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
        struct ip6_tnl __rcu *tnls_wc[1];
        struct ip6_tnl __rcu **tnls[2];
  };
@@@ -1174,6 -1174,7 +1174,7 @@@ ip4ip6_tnl_xmit(struct sk_buff *skb, st
                encap_limit = t->parms.encap_limit;
  
        memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+       fl6.flowi6_proto = IPPROTO_IPIP;
  
        dsfield = ipv4_get_dsfield(iph);
  
@@@ -1233,6 -1234,7 +1234,7 @@@ ip6ip6_tnl_xmit(struct sk_buff *skb, st
                encap_limit = t->parms.encap_limit;
  
        memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+       fl6.flowi6_proto = IPPROTO_IPV6;
  
        dsfield = ipv6_get_dsfield(ipv6h);
        if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
@@@ -2031,7 -2033,7 +2033,7 @@@ static void __net_exit ip6_tnl_destroy_
                if (dev->rtnl_link_ops == &ip6_link_ops)
                        unregister_netdevice_queue(dev, &list);
  
 -      for (h = 0; h < HASH_SIZE; h++) {
 +      for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
                t = rtnl_dereference(ip6n->tnls_r_l[h]);
                while (t) {
                        /* If dev is in the same netns, it has already
diff --combined net/ipv6/xfrm6_policy.c
@@@ -36,7 -36,7 +36,7 @@@ static struct dst_entry *xfrm6_dst_look
        int err;
  
        memset(&fl6, 0, sizeof(fl6));
-       fl6.flowi6_oif = oif;
+       fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
        fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
        memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
        if (saddr)
@@@ -134,7 -134,7 +134,7 @@@ _decode_session6(struct sk_buff *skb, s
        nexthdr = nh[nhoff];
  
        if (skb_dst(skb))
 -              oif = l3mdev_fib_oif(skb_dst(skb)->dev);
 +              oif = skb_dst(skb)->dev->ifindex;
  
        memset(fl6, 0, sizeof(struct flowi6));
        fl6->flowi6_mark = skb->mark;
diff --combined net/kcm/kcmsock.c
@@@ -1,13 -1,3 +1,13 @@@
 +/*
 + * Kernel Connection Multiplexor
 + *
 + * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
 + *
 + * This program is free software; you can redistribute it and/or modify
 + * it under the terms of the GNU General Public License version 2
 + * as published by the Free Software Foundation.
 + */
 +
  #include <linux/bpf.h>
  #include <linux/errno.h>
  #include <linux/errqueue.h>
  #include <linux/socket.h>
  #include <linux/uaccess.h>
  #include <linux/workqueue.h>
+ #include <linux/syscalls.h>
  #include <net/kcm.h>
  #include <net/netns/generic.h>
  #include <net/sock.h>
 -#include <net/tcp.h>
  #include <uapi/linux/kcm.h>
  
  unsigned int kcm_net_id;
@@@ -44,12 -36,38 +45,12 @@@ static inline struct kcm_tx_msg *kcm_tx
        return (struct kcm_tx_msg *)skb->cb;
  }
  
 -static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
 -{
 -      return (struct kcm_rx_msg *)((void *)skb->cb +
 -                                   offsetof(struct qdisc_skb_cb, data));
 -}
 -
  static void report_csk_error(struct sock *csk, int err)
  {
        csk->sk_err = EPIPE;
        csk->sk_error_report(csk);
  }
  
 -/* Callback lock held */
 -static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
 -                             struct sk_buff *skb)
 -{
 -      struct sock *csk = psock->sk;
 -
 -      /* Unrecoverable error in receive */
 -
 -      del_timer(&psock->rx_msg_timer);
 -
 -      if (psock->rx_stopped)
 -              return;
 -
 -      psock->rx_stopped = 1;
 -      KCM_STATS_INCR(psock->stats.rx_aborts);
 -
 -      /* Report an error on the lower socket */
 -      report_csk_error(csk, err);
 -}
 -
  static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
                               bool wakeup_kcm)
  {
  static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
                                    struct kcm_psock *psock)
  {
 -      KCM_STATS_ADD(mux->stats.rx_bytes,
 -                    psock->stats.rx_bytes - psock->saved_rx_bytes);
 +      STRP_STATS_ADD(mux->stats.rx_bytes,
 +                     psock->strp.stats.rx_bytes -
 +                     psock->saved_rx_bytes);
        mux->stats.rx_msgs +=
 -              psock->stats.rx_msgs - psock->saved_rx_msgs;
 -      psock->saved_rx_msgs = psock->stats.rx_msgs;
 -      psock->saved_rx_bytes = psock->stats.rx_bytes;
 +              psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
 +      psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
 +      psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
  }
  
  static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@@ -151,11 -168,11 +152,11 @@@ static void kcm_rcv_ready(struct kcm_so
                 */
                list_del(&psock->psock_ready_list);
                psock->ready_rx_msg = NULL;
 -
                /* Commit clearing of ready_rx_msg for queuing work */
                smp_mb();
  
 -              queue_work(kcm_wq, &psock->rx_work);
 +              strp_unpause(&psock->strp);
 +              strp_check_rcv(&psock->strp);
        }
  
        /* Buffer limit is okay now, add to ready list */
@@@ -269,7 -286,6 +270,7 @@@ static struct kcm_sock *reserve_rx_kcm(
  
        if (list_empty(&mux->kcm_rx_waiters)) {
                psock->ready_rx_msg = head;
 +              strp_pause(&psock->strp);
                list_add_tail(&psock->psock_ready_list,
                              &mux->psocks_ready);
                spin_unlock_bh(&mux->rx_lock);
@@@ -338,60 -354,346 +339,60 @@@ static void unreserve_rx_kcm(struct kcm
        spin_unlock_bh(&mux->rx_lock);
  }
  
 -static void kcm_start_rx_timer(struct kcm_psock *psock)
 -{
 -      if (psock->sk->sk_rcvtimeo)
 -              mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
 -}
 -
 -/* Macro to invoke filter function. */
 -#define KCM_RUN_FILTER(prog, ctx) \
 -      (*prog->bpf_func)(ctx, prog->insnsi)
 -
 -/* Lower socket lock held */
 -static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 -                      unsigned int orig_offset, size_t orig_len)
 -{
 -      struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
 -      struct kcm_rx_msg *rxm;
 -      struct kcm_sock *kcm;
 -      struct sk_buff *head, *skb;
 -      size_t eaten = 0, cand_len;
 -      ssize_t extra;
 -      int err;
 -      bool cloned_orig = false;
 -
 -      if (psock->ready_rx_msg)
 -              return 0;
 -
 -      head = psock->rx_skb_head;
 -      if (head) {
 -              /* Message already in progress */
 -
 -              rxm = kcm_rx_msg(head);
 -              if (unlikely(rxm->early_eaten)) {
 -                      /* Already some number of bytes on the receive sock
 -                       * data saved in rx_skb_head, just indicate they
 -                       * are consumed.
 -                       */
 -                      eaten = orig_len <= rxm->early_eaten ?
 -                              orig_len : rxm->early_eaten;
 -                      rxm->early_eaten -= eaten;
 -
 -                      return eaten;
 -              }
 -
 -              if (unlikely(orig_offset)) {
 -                      /* Getting data with a non-zero offset when a message is
 -                       * in progress is not expected. If it does happen, we
 -                       * need to clone and pull since we can't deal with
 -                       * offsets in the skbs for a message expect in the head.
 -                       */
 -                      orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
 -                      if (!orig_skb) {
 -                              KCM_STATS_INCR(psock->stats.rx_mem_fail);
 -                              desc->error = -ENOMEM;
 -                              return 0;
 -                      }
 -                      if (!pskb_pull(orig_skb, orig_offset)) {
 -                              KCM_STATS_INCR(psock->stats.rx_mem_fail);
 -                              kfree_skb(orig_skb);
 -                              desc->error = -ENOMEM;
 -                              return 0;
 -                      }
 -                      cloned_orig = true;
 -                      orig_offset = 0;
 -              }
 -
 -              if (!psock->rx_skb_nextp) {
 -                      /* We are going to append to the frags_list of head.
 -                       * Need to unshare the frag_list.
 -                       */
 -                      err = skb_unclone(head, GFP_ATOMIC);
 -                      if (err) {
 -                              KCM_STATS_INCR(psock->stats.rx_mem_fail);
 -                              desc->error = err;
 -                              return 0;
 -                      }
 -
 -                      if (unlikely(skb_shinfo(head)->frag_list)) {
 -                              /* We can't append to an sk_buff that already
 -                               * has a frag_list. We create a new head, point
 -                               * the frag_list of that to the old head, and
 -                               * then are able to use the old head->next for
 -                               * appending to the message.
 -                               */
 -                              if (WARN_ON(head->next)) {
 -                                      desc->error = -EINVAL;
 -                                      return 0;
 -                              }
 -
 -                              skb = alloc_skb(0, GFP_ATOMIC);
 -                              if (!skb) {
 -                                      KCM_STATS_INCR(psock->stats.rx_mem_fail);
 -                                      desc->error = -ENOMEM;
 -                                      return 0;
 -                              }
 -                              skb->len = head->len;
 -                              skb->data_len = head->len;
 -                              skb->truesize = head->truesize;
 -                              *kcm_rx_msg(skb) = *kcm_rx_msg(head);
 -                              psock->rx_skb_nextp = &head->next;
 -                              skb_shinfo(skb)->frag_list = head;
 -                              psock->rx_skb_head = skb;
 -                              head = skb;
 -                      } else {
 -                              psock->rx_skb_nextp =
 -                                  &skb_shinfo(head)->frag_list;
 -                      }
 -              }
 -      }
 -
 -      while (eaten < orig_len) {
 -              /* Always clone since we will consume something */
 -              skb = skb_clone(orig_skb, GFP_ATOMIC);
 -              if (!skb) {
 -                      KCM_STATS_INCR(psock->stats.rx_mem_fail);
 -                      desc->error = -ENOMEM;
 -                      break;
 -              }
 -
 -              cand_len = orig_len - eaten;
 -
 -              head = psock->rx_skb_head;
 -              if (!head) {
 -                      head = skb;
 -                      psock->rx_skb_head = head;
 -                      /* Will set rx_skb_nextp on next packet if needed */
 -                      psock->rx_skb_nextp = NULL;
 -                      rxm = kcm_rx_msg(head);
 -                      memset(rxm, 0, sizeof(*rxm));
 -                      rxm->offset = orig_offset + eaten;
 -              } else {
 -                      /* Unclone since we may be appending to an skb that we
 -                       * already share a frag_list with.
 -                       */
 -                      err = skb_unclone(skb, GFP_ATOMIC);
 -                      if (err) {
 -                              KCM_STATS_INCR(psock->stats.rx_mem_fail);
 -                              desc->error = err;
 -                              break;
 -                      }
 -
 -                      rxm = kcm_rx_msg(head);
 -                      *psock->rx_skb_nextp = skb;
 -                      psock->rx_skb_nextp = &skb->next;
 -                      head->data_len += skb->len;
 -                      head->len += skb->len;
 -                      head->truesize += skb->truesize;
 -              }
 -
 -              if (!rxm->full_len) {
 -                      ssize_t len;
 -
 -                      len = KCM_RUN_FILTER(psock->bpf_prog, head);
 -
 -                      if (!len) {
 -                              /* Need more header to determine length */
 -                              if (!rxm->accum_len) {
 -                                      /* Start RX timer for new message */
 -                                      kcm_start_rx_timer(psock);
 -                              }
 -                              rxm->accum_len += cand_len;
 -                              eaten += cand_len;
 -                              KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
 -                              WARN_ON(eaten != orig_len);
 -                              break;
 -                      } else if (len > psock->sk->sk_rcvbuf) {
 -                              /* Message length exceeds maximum allowed */
 -                              KCM_STATS_INCR(psock->stats.rx_msg_too_big);
 -                              desc->error = -EMSGSIZE;
 -                              psock->rx_skb_head = NULL;
 -                              kcm_abort_rx_psock(psock, EMSGSIZE, head);
 -                              break;
 -                      } else if (len <= (ssize_t)head->len -
 -                                        skb->len - rxm->offset) {
 -                              /* Length must be into new skb (and also
 -                               * greater than zero)
 -                               */
 -                              KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
 -                              desc->error = -EPROTO;
 -                              psock->rx_skb_head = NULL;
 -                              kcm_abort_rx_psock(psock, EPROTO, head);
 -                              break;
 -                      }
 -
 -                      rxm->full_len = len;
 -              }
 -
 -              extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
 -
 -              if (extra < 0) {
 -                      /* Message not complete yet. */
 -                      if (rxm->full_len - rxm->accum_len >
 -                          tcp_inq(psock->sk)) {
 -                              /* Don't have the whole messages in the socket
 -                               * buffer. Set psock->rx_need_bytes to wait for
 -                               * the rest of the message. Also, set "early
 -                               * eaten" since we've already buffered the skb
 -                               * but don't consume yet per tcp_read_sock.
 -                               */
 -
 -                              if (!rxm->accum_len) {
 -                                      /* Start RX timer for new message */
 -                                      kcm_start_rx_timer(psock);
 -                              }
 -
 -                              psock->rx_need_bytes = rxm->full_len -
 -                                                     rxm->accum_len;
 -                              rxm->accum_len += cand_len;
 -                              rxm->early_eaten = cand_len;
 -                              KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
 -                              desc->count = 0; /* Stop reading socket */
 -                              break;
 -                      }
 -                      rxm->accum_len += cand_len;
 -                      eaten += cand_len;
 -                      WARN_ON(eaten != orig_len);
 -                      break;
 -              }
 -
 -              /* Positive extra indicates ore bytes than needed for the
 -               * message
 -               */
 -
 -              WARN_ON(extra > cand_len);
 -
 -              eaten += (cand_len - extra);
 -
 -              /* Hurray, we have a new message! */
 -              del_timer(&psock->rx_msg_timer);
 -              psock->rx_skb_head = NULL;
 -              KCM_STATS_INCR(psock->stats.rx_msgs);
 -
 -try_queue:
 -              kcm = reserve_rx_kcm(psock, head);
 -              if (!kcm) {
 -                      /* Unable to reserve a KCM, message is held in psock. */
 -                      break;
 -              }
 -
 -              if (kcm_queue_rcv_skb(&kcm->sk, head)) {
 -                      /* Should mean socket buffer full */
 -                      unreserve_rx_kcm(psock, false);
 -                      goto try_queue;
 -              }
 -      }
 -
 -      if (cloned_orig)
 -              kfree_skb(orig_skb);
 -
 -      KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
 -
 -      return eaten;
 -}
 -
 -/* Called with lock held on lower socket */
 -static int psock_tcp_read_sock(struct kcm_psock *psock)
 -{
 -      read_descriptor_t desc;
 -
 -      desc.arg.data = psock;
 -      desc.error = 0;
 -      desc.count = 1; /* give more than one skb per call */
 -
 -      /* sk should be locked here, so okay to do tcp_read_sock */
 -      tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
 -
 -      unreserve_rx_kcm(psock, true);
 -
 -      return desc.error;
 -}
 -
  /* Lower sock lock held */
 -static void psock_tcp_data_ready(struct sock *sk)
 +static void psock_data_ready(struct sock *sk)
  {
        struct kcm_psock *psock;
  
        read_lock_bh(&sk->sk_callback_lock);
  
        psock = (struct kcm_psock *)sk->sk_user_data;
 -      if (unlikely(!psock || psock->rx_stopped))
 -              goto out;
 +      if (likely(psock))
 +              strp_data_ready(&psock->strp);
  
 -      if (psock->ready_rx_msg)
 -              goto out;
 -
 -      if (psock->rx_need_bytes) {
 -              if (tcp_inq(sk) >= psock->rx_need_bytes)
 -                      psock->rx_need_bytes = 0;
 -              else
 -                      goto out;
 -      }
 -
 -      if (psock_tcp_read_sock(psock) == -ENOMEM)
 -              queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
 -
 -out:
        read_unlock_bh(&sk->sk_callback_lock);
  }
  
 -static void do_psock_rx_work(struct kcm_psock *psock)
 +/* Called with lower sock held */
 +static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
  {
 -      read_descriptor_t rd_desc;
 -      struct sock *csk = psock->sk;
 -
 -      /* We need the read lock to synchronize with psock_tcp_data_ready. We
 -       * need the socket lock for calling tcp_read_sock.
 -       */
 -      lock_sock(csk);
 -      read_lock_bh(&csk->sk_callback_lock);
 -
 -      if (unlikely(csk->sk_user_data != psock))
 -              goto out;
 -
 -      if (unlikely(psock->rx_stopped))
 -              goto out;
 -
 -      if (psock->ready_rx_msg)
 -              goto out;
 -
 -      rd_desc.arg.data = psock;
 +      struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
 +      struct kcm_sock *kcm;
  
 -      if (psock_tcp_read_sock(psock) == -ENOMEM)
 -              queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
 +try_queue:
 +      kcm = reserve_rx_kcm(psock, skb);
 +      if (!kcm) {
 +               /* Unable to reserve a KCM, message is held in psock and strp
 +                * is paused.
 +                */
 +              return;
 +      }
  
 -out:
 -      read_unlock_bh(&csk->sk_callback_lock);
 -      release_sock(csk);
 +      if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
 +              /* Should mean socket buffer full */
 +              unreserve_rx_kcm(psock, false);
 +              goto try_queue;
 +      }
  }
  
 -static void psock_rx_work(struct work_struct *w)
 +static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
  {
 -      do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
 +      struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
 +      struct bpf_prog *prog = psock->bpf_prog;
 +
 +      return (*prog->bpf_func)(skb, prog->insnsi);
  }
  
 -static void psock_rx_delayed_work(struct work_struct *w)
 +static int kcm_read_sock_done(struct strparser *strp, int err)
  {
 -      do_psock_rx_work(container_of(w, struct kcm_psock,
 -                                    rx_delayed_work.work));
 +      struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
 +
 +      unreserve_rx_kcm(psock, true);
 +
 +      return err;
  }
  
 -static void psock_tcp_state_change(struct sock *sk)
 +static void psock_state_change(struct sock *sk)
  {
        /* TCP only does a POLLIN for a half close. Do a POLLHUP here
         * since application will normally not poll with POLLIN
        report_csk_error(sk, EPIPE);
  }
  
 -static void psock_tcp_write_space(struct sock *sk)
 +static void psock_write_space(struct sock *sk)
  {
        struct kcm_psock *psock;
        struct kcm_mux *mux;
        psock = (struct kcm_psock *)sk->sk_user_data;
        if (unlikely(!psock))
                goto out;
 -
        mux = psock->mux;
  
        spin_lock_bh(&mux->lock);
  
        /* Check if the socket is reserved so someone is waiting for sending. */
        kcm = psock->tx_kcm;
 -      if (kcm)
 +      if (kcm && !unlikely(kcm->tx_stopped))
                queue_work(kcm_wq, &kcm->tx_work);
  
        spin_unlock_bh(&mux->lock);
@@@ -1109,7 -1412,7 +1110,7 @@@ static int kcm_recvmsg(struct socket *s
        struct kcm_sock *kcm = kcm_sk(sk);
        int err = 0;
        long timeo;
 -      struct kcm_rx_msg *rxm;
 +      struct strp_rx_msg *rxm;
        int copied = 0;
        struct sk_buff *skb;
  
  
        /* Okay, have a message on the receive queue */
  
 -      rxm = kcm_rx_msg(skb);
 +      rxm = strp_rx_msg(skb);
  
        if (len > rxm->full_len)
                len = rxm->full_len;
@@@ -1179,7 -1482,7 +1180,7 @@@ static ssize_t kcm_splice_read(struct s
        struct sock *sk = sock->sk;
        struct kcm_sock *kcm = kcm_sk(sk);
        long timeo;
 -      struct kcm_rx_msg *rxm;
 +      struct strp_rx_msg *rxm;
        int err = 0;
        ssize_t copied;
        struct sk_buff *skb;
  
        /* Okay, have a message on the receive queue */
  
 -      rxm = kcm_rx_msg(skb);
 +      rxm = strp_rx_msg(skb);
  
        if (len > rxm->full_len)
                len = rxm->full_len;
@@@ -1372,6 -1675,15 +1373,6 @@@ static void init_kcm_sock(struct kcm_so
        spin_unlock_bh(&mux->rx_lock);
  }
  
 -static void kcm_rx_msg_timeout(unsigned long arg)
 -{
 -      struct kcm_psock *psock = (struct kcm_psock *)arg;
 -
 -      /* Message assembly timed out */
 -      KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
 -      kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
 -}
 -
  static int kcm_attach(struct socket *sock, struct socket *csock,
                      struct bpf_prog *prog)
  {
        struct kcm_psock *psock = NULL, *tpsock;
        struct list_head *head;
        int index = 0;
 -
 -      if (csock->ops->family != PF_INET &&
 -          csock->ops->family != PF_INET6)
 -              return -EINVAL;
 +      struct strp_callbacks cb;
 +      int err;
  
        csk = csock->sk;
        if (!csk)
                return -EINVAL;
  
 -      /* Only support TCP for now */
 -      if (csk->sk_protocol != IPPROTO_TCP)
 -              return -EINVAL;
 -
        psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
        if (!psock)
                return -ENOMEM;
        psock->sk = csk;
        psock->bpf_prog = prog;
  
 -      setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
 -                  (unsigned long)psock);
 +      cb.rcv_msg = kcm_rcv_strparser;
 +      cb.abort_parser = NULL;
 +      cb.parse_msg = kcm_parse_func_strparser;
 +      cb.read_sock_done = kcm_read_sock_done;
  
 -      INIT_WORK(&psock->rx_work, psock_rx_work);
 -      INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
 +      err = strp_init(&psock->strp, csk, &cb);
 +      if (err) {
 +              kmem_cache_free(kcm_psockp, psock);
 +              return err;
 +      }
  
        sock_hold(csk);
  
        psock->save_write_space = csk->sk_write_space;
        psock->save_state_change = csk->sk_state_change;
        csk->sk_user_data = psock;
 -      csk->sk_data_ready = psock_tcp_data_ready;
 -      csk->sk_write_space = psock_tcp_write_space;
 -      csk->sk_state_change = psock_tcp_state_change;
 +      csk->sk_data_ready = psock_data_ready;
 +      csk->sk_write_space = psock_write_space;
 +      csk->sk_state_change = psock_state_change;
        write_unlock_bh(&csk->sk_callback_lock);
  
        /* Finished initialization, now add the psock to the MUX. */
        spin_unlock_bh(&mux->lock);
  
        /* Schedule RX work in case there are already bytes queued */
 -      queue_work(kcm_wq, &psock->rx_work);
 +      strp_check_rcv(&psock->strp);
  
        return 0;
  }
@@@ -1478,8 -1791,6 +1479,8 @@@ static void kcm_unattach(struct kcm_pso
        struct sock *csk = psock->sk;
        struct kcm_mux *mux = psock->mux;
  
 +      lock_sock(csk);
 +
        /* Stop getting callbacks from TCP socket. After this there should
         * be no way to reserve a kcm for this psock.
         */
        csk->sk_data_ready = psock->save_data_ready;
        csk->sk_write_space = psock->save_write_space;
        csk->sk_state_change = psock->save_state_change;
 -      psock->rx_stopped = 1;
 +      strp_stop(&psock->strp);
  
        if (WARN_ON(psock->rx_kcm)) {
                write_unlock_bh(&csk->sk_callback_lock);
  
        write_unlock_bh(&csk->sk_callback_lock);
  
 -      del_timer_sync(&psock->rx_msg_timer);
 -      cancel_work_sync(&psock->rx_work);
 -      cancel_delayed_work_sync(&psock->rx_delayed_work);
 +      /* Call strp_done without sock lock */
 +      release_sock(csk);
 +      strp_done(&psock->strp);
 +      lock_sock(csk);
  
        bpf_prog_put(psock->bpf_prog);
  
 -      kfree_skb(psock->rx_skb_head);
 -      psock->rx_skb_head = NULL;
 -
        spin_lock_bh(&mux->lock);
  
        aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
 +      save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
  
        KCM_STATS_INCR(mux->stats.psock_unattach);
  
@@@ -1564,8 -1876,6 +1565,8 @@@ no_reserved
                fput(csk->sk_socket->file);
                kmem_cache_free(kcm_psockp, psock);
        }
 +
 +      release_sock(csk);
  }
  
  static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
  
                spin_unlock_bh(&mux->lock);
  
 +              /* Lower socket lock should already be held */
                kcm_unattach(psock);
  
                err = 0;
@@@ -1721,7 -2030,7 +1722,7 @@@ static int kcm_ioctl(struct socket *soc
                        if (copy_to_user((void __user *)arg, &info,
                                         sizeof(info))) {
                                err = -EFAULT;
-                               sock_release(newsock);
+                               sys_close(info.fd);
                        }
                }
  
@@@ -1764,8 -2073,6 +1765,8 @@@ static void release_mux(struct kcm_mux 
        aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
        aggregate_psock_stats(&mux->aggregate_psock_stats,
                              &knet->aggregate_psock_stats);
 +      aggregate_strp_stats(&mux->aggregate_strp_stats,
 +                           &knet->aggregate_strp_stats);
        list_del_rcu(&mux->kcm_mux_list);
        knet->count--;
        mutex_unlock(&knet->mutex);
@@@ -1845,13 -2152,6 +1846,13 @@@ static int kcm_release(struct socket *s
         * it will just return.
         */
        __skb_queue_purge(&sk->sk_write_queue);
 +
 +      /* Set tx_stopped. This is checked when psock is bound to a kcm and we
 +       * get a writespace callback. This prevents further work being queued
 +       * from the callback (unbinding the psock occurs after canceling work.
 +       */
 +      kcm->tx_stopped = 1;
 +
        release_sock(sk);
  
        spin_lock_bh(&mux->lock);
diff --combined net/xfrm/xfrm_policy.c
@@@ -49,7 -49,6 +49,7 @@@ static struct xfrm_policy_afinfo __rcu 
                                                __read_mostly;
  
  static struct kmem_cache *xfrm_dst_cache __read_mostly;
 +static __read_mostly seqcount_t xfrm_policy_hash_generation;
  
  static void xfrm_init_pmtu(struct dst_entry *dst);
  static int stale_bundle(struct dst_entry *dst);
@@@ -60,11 -59,6 +60,11 @@@ static void __xfrm_policy_link(struct x
  static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
                                                int dir);
  
 +static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
 +{
 +      return atomic_inc_not_zero(&policy->refcnt);
 +}
 +
  static inline bool
  __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  {
@@@ -391,11 -385,9 +391,11 @@@ static struct hlist_head *policy_hash_b
        __get_hash_thresh(net, family, dir, &dbits, &sbits);
        hash = __sel_hash(sel, family, hmask, dbits, sbits);
  
 -      return (hash == hmask + 1 ?
 -              &net->xfrm.policy_inexact[dir] :
 -              net->xfrm.policy_bydst[dir].table + hash);
 +      if (hash == hmask + 1)
 +              return &net->xfrm.policy_inexact[dir];
 +
 +      return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
 +                   lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
  }
  
  static struct hlist_head *policy_hash_direct(struct net *net,
        __get_hash_thresh(net, family, dir, &dbits, &sbits);
        hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
  
 -      return net->xfrm.policy_bydst[dir].table + hash;
 +      return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
 +                   lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
  }
  
  static void xfrm_dst_hash_transfer(struct net *net,
@@@ -435,14 -426,14 +435,14 @@@ redo
                h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
                                pol->family, nhashmask, dbits, sbits);
                if (!entry0) {
 -                      hlist_del(&pol->bydst);
 -                      hlist_add_head(&pol->bydst, ndsttable+h);
 +                      hlist_del_rcu(&pol->bydst);
 +                      hlist_add_head_rcu(&pol->bydst, ndsttable + h);
                        h0 = h;
                } else {
                        if (h != h0)
                                continue;
 -                      hlist_del(&pol->bydst);
 -                      hlist_add_behind(&pol->bydst, entry0);
 +                      hlist_del_rcu(&pol->bydst);
 +                      hlist_add_behind_rcu(&pol->bydst, entry0);
                }
                entry0 = &pol->bydst;
        }
@@@ -477,32 -468,22 +477,32 @@@ static void xfrm_bydst_resize(struct ne
        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 -      struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
        struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 +      struct hlist_head *odst;
        int i;
  
        if (!ndst)
                return;
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      write_seqcount_begin(&xfrm_policy_hash_generation);
 +
 +      odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
 +                              lockdep_is_held(&net->xfrm.xfrm_policy_lock));
 +
 +      odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
 +                              lockdep_is_held(&net->xfrm.xfrm_policy_lock));
  
        for (i = hmask; i >= 0; i--)
                xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
  
 -      net->xfrm.policy_bydst[dir].table = ndst;
 +      rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
        net->xfrm.policy_bydst[dir].hmask = nhashmask;
  
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      write_seqcount_end(&xfrm_policy_hash_generation);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +
 +      synchronize_rcu();
  
        xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
  }
@@@ -519,7 -500,7 +519,7 @@@ static void xfrm_byidx_resize(struct ne
        if (!nidx)
                return;
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  
        for (i = hmask; i >= 0; i--)
                xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
        net->xfrm.policy_byidx = nidx;
        net->xfrm.policy_idx_hmask = nhashmask;
  
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
  }
@@@ -560,6 -541,7 +560,6 @@@ static inline int xfrm_byidx_should_res
  
  void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
  {
 -      read_lock_bh(&net->xfrm.xfrm_policy_lock);
        si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
        si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
        si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
        si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
        si->spdhcnt = net->xfrm.policy_idx_hmask;
        si->spdhmcnt = xfrm_policy_hashmax;
 -      read_unlock_bh(&net->xfrm.xfrm_policy_lock);
  }
  EXPORT_SYMBOL(xfrm_spd_getinfo);
  
@@@ -617,7 -600,7 +617,7 @@@ static void xfrm_hash_rebuild(struct wo
                rbits6 = net->xfrm.policy_hthresh.rbits6;
        } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  
        /* reset the bydst and inexact table in all directions */
        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
  
        /* re-insert all policies by order of creation */
        list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+               if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+                       /* skip socket policies */
+                       continue;
+               }
                newpos = NULL;
                chain = policy_hash_bysel(net, &policy->selector,
                                          policy->family,
                        hlist_add_head(&policy->bydst, chain);
        }
  
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        mutex_unlock(&hash_resize_mutex);
  }
@@@ -770,7 -757,7 +774,7 @@@ int xfrm_policy_insert(int dir, struct 
        struct hlist_head *chain;
        struct hlist_node *newpos;
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
        delpol = NULL;
        newpos = NULL;
                    xfrm_sec_ctx_match(pol->security, policy->security) &&
                    !WARN_ON(delpol)) {
                        if (excl) {
 -                              write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +                              spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                return -EEXIST;
                        }
                        delpol = pol;
        policy->curlft.use_time = 0;
        if (!mod_timer(&policy->timer, jiffies + HZ))
                xfrm_pol_hold(policy);
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        if (delpol)
                xfrm_policy_kill(delpol);
@@@ -837,7 -824,7 +841,7 @@@ struct xfrm_policy *xfrm_policy_bysel_c
        struct hlist_head *chain;
  
        *err = 0;
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = policy_hash_bysel(net, sel, sel->family, dir);
        ret = NULL;
        hlist_for_each_entry(pol, chain, bydst) {
                                *err = security_xfrm_policy_delete(
                                                                pol->security);
                                if (*err) {
 -                                      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +                                      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                        return pol;
                                }
                                __xfrm_policy_unlink(pol, dir);
                        break;
                }
        }
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        if (ret && delete)
                xfrm_policy_kill(ret);
@@@ -878,7 -865,7 +882,7 @@@ struct xfrm_policy *xfrm_policy_byid(st
                return NULL;
  
        *err = 0;
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = net->xfrm.policy_byidx + idx_hash(net, id);
        ret = NULL;
        hlist_for_each_entry(pol, chain, byidx) {
                                *err = security_xfrm_policy_delete(
                                                                pol->security);
                                if (*err) {
 -                                      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +                                      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                        return pol;
                                }
                                __xfrm_policy_unlink(pol, dir);
                        break;
                }
        }
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        if (ret && delete)
                xfrm_policy_kill(ret);
@@@ -956,7 -943,7 +960,7 @@@ int xfrm_policy_flush(struct net *net, 
  {
        int dir, err = 0, cnt = 0;
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
  
        err = xfrm_policy_flush_secctx_check(net, type, task_valid);
        if (err)
                        if (pol->type != type)
                                continue;
                        __xfrm_policy_unlink(pol, dir);
 -                      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +                      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                        cnt++;
  
                        xfrm_audit_policy_delete(pol, 1, task_valid);
  
                        xfrm_policy_kill(pol);
  
 -                      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +                      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
                        goto again1;
                }
  
                                if (pol->type != type)
                                        continue;
                                __xfrm_policy_unlink(pol, dir);
 -                              write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +                              spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                                cnt++;
  
                                xfrm_audit_policy_delete(pol, 1, task_valid);
                                xfrm_policy_kill(pol);
  
 -                              write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +                              spin_lock_bh(&net->xfrm.xfrm_policy_lock);
                                goto again2;
                        }
                }
        if (!cnt)
                err = -ESRCH;
  out:
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        return err;
  }
  EXPORT_SYMBOL(xfrm_policy_flush);
@@@ -1026,7 -1013,7 +1030,7 @@@ int xfrm_policy_walk(struct net *net, s
        if (list_empty(&walk->walk.all) && walk->seq != 0)
                return 0;
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        if (list_empty(&walk->walk.all))
                x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
        else
        }
        list_del_init(&walk->walk.all);
  out:
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        return error;
  }
  EXPORT_SYMBOL(xfrm_policy_walk);
@@@ -1073,9 -1060,9 +1077,9 @@@ void xfrm_policy_walk_done(struct xfrm_
        if (list_empty(&walk->walk.all))
                return;
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
        list_del(&walk->walk.all);
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  }
  EXPORT_SYMBOL(xfrm_policy_walk_done);
  
@@@ -1113,24 -1100,17 +1117,24 @@@ static struct xfrm_policy *xfrm_policy_
        struct xfrm_policy *pol, *ret;
        const xfrm_address_t *daddr, *saddr;
        struct hlist_head *chain;
 -      u32 priority = ~0U;
 +      unsigned int sequence;
 +      u32 priority;
  
        daddr = xfrm_flowi_daddr(fl, family);
        saddr = xfrm_flowi_saddr(fl, family);
        if (unlikely(!daddr || !saddr))
                return NULL;
  
 -      read_lock_bh(&net->xfrm.xfrm_policy_lock);
 -      chain = policy_hash_direct(net, daddr, saddr, family, dir);
 +      rcu_read_lock();
 + retry:
 +      do {
 +              sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
 +              chain = policy_hash_direct(net, daddr, saddr, family, dir);
 +      } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
 +
 +      priority = ~0U;
        ret = NULL;
 -      hlist_for_each_entry(pol, chain, bydst) {
 +      hlist_for_each_entry_rcu(pol, chain, bydst) {
                err = xfrm_policy_match(pol, fl, type, family, dir);
                if (err) {
                        if (err == -ESRCH)
                }
        }
        chain = &net->xfrm.policy_inexact[dir];
 -      hlist_for_each_entry(pol, chain, bydst) {
 +      hlist_for_each_entry_rcu(pol, chain, bydst) {
                if ((pol->priority >= priority) && ret)
                        break;
  
                }
        }
  
 -      xfrm_pol_hold(ret);
 +      if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
 +              goto retry;
 +
 +      if (ret && !xfrm_pol_hold_rcu(ret))
 +              goto retry;
  fail:
 -      read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      rcu_read_unlock();
  
        return ret;
  }
@@@ -1247,9 -1223,10 +1251,9 @@@ static struct xfrm_policy *xfrm_sk_poli
                                                 const struct flowi *fl)
  {
        struct xfrm_policy *pol;
 -      struct net *net = sock_net(sk);
  
        rcu_read_lock();
 -      read_lock_bh(&net->xfrm.xfrm_policy_lock);
 + again:
        pol = rcu_dereference(sk->sk_policy[dir]);
        if (pol != NULL) {
                bool match = xfrm_selector_match(&pol->selector, fl,
                        err = security_xfrm_policy_lookup(pol->security,
                                                      fl->flowi_secid,
                                                      policy_to_flow_dir(dir));
 -                      if (!err)
 -                              xfrm_pol_hold(pol);
 +                      if (!err && !xfrm_pol_hold_rcu(pol))
 +                              goto again;
                        else if (err == -ESRCH)
                                pol = NULL;
                        else
                        pol = NULL;
        }
  out:
 -      read_unlock_bh(&net->xfrm.xfrm_policy_lock);
        rcu_read_unlock();
        return pol;
  }
@@@ -1297,7 -1275,7 +1301,7 @@@ static struct xfrm_policy *__xfrm_polic
  
        /* Socket policies are not hashed. */
        if (!hlist_unhashed(&pol->bydst)) {
 -              hlist_del(&pol->bydst);
 +              hlist_del_rcu(&pol->bydst);
                hlist_del(&pol->byidx);
        }
  
@@@ -1321,9 -1299,9 +1325,9 @@@ int xfrm_policy_delete(struct xfrm_poli
  {
        struct net *net = xp_net(pol);
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        pol = __xfrm_policy_unlink(pol, dir);
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
        if (pol) {
                xfrm_policy_kill(pol);
                return 0;
@@@ -1342,7 -1320,7 +1346,7 @@@ int xfrm_sk_policy_insert(struct sock *
                return -EINVAL;
  #endif
  
 -      write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        old_pol = rcu_dereference_protected(sk->sk_policy[dir],
                                lockdep_is_held(&net->xfrm.xfrm_policy_lock));
        if (pol) {
                 */
                xfrm_sk_policy_unlink(old_pol, dir);
        }
 -      write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        if (old_pol) {
                xfrm_policy_kill(old_pol);
@@@ -1390,9 -1368,9 +1394,9 @@@ static struct xfrm_policy *clone_policy
                newp->type = old->type;
                memcpy(newp->xfrm_vec, old->xfrm_vec,
                       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 -              write_lock_bh(&net->xfrm.xfrm_policy_lock);
 +              spin_lock_bh(&net->xfrm.xfrm_policy_lock);
                xfrm_sk_policy_link(newp, dir);
 -              write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +              spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
                xfrm_pol_put(newp);
        }
        return newp;
@@@ -3074,7 -3052,7 +3078,7 @@@ static int __net_init xfrm_net_init(str
  
        /* Initialize the per-net locks here */
        spin_lock_init(&net->xfrm.xfrm_state_lock);
 -      rwlock_init(&net->xfrm.xfrm_policy_lock);
 +      spin_lock_init(&net->xfrm.xfrm_policy_lock);
        mutex_init(&net->xfrm.xfrm_cfg_mutex);
  
        return 0;
@@@ -3108,7 -3086,6 +3112,7 @@@ static struct pernet_operations __net_i
  void __init xfrm_init(void)
  {
        register_pernet_subsys(&xfrm_net_ops);
 +      seqcount_init(&xfrm_policy_hash_generation);
        xfrm_input_init();
  }
  
@@@ -3206,7 -3183,7 +3210,7 @@@ static struct xfrm_policy *xfrm_migrate
        struct hlist_head *chain;
        u32 priority = ~0U;
  
 -      read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
 +      spin_lock_bh(&net->xfrm.xfrm_policy_lock);
        chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
        hlist_for_each_entry(pol, chain, bydst) {
                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
  
        xfrm_pol_hold(ret);
  
 -      read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 +      spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
        return ret;
  }